adrien.aribaut-gaudin commited on
Commit
498db6b
1 Parent(s): 47ca6bf

feat: new public GenProp

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.docx filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ __pycache__
2
+ venv1
3
+ test/files_to_test/*
4
+ config_key.py
5
+ test
6
+ .env
app.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from config import config
2
+ from src.control.controller import Controller
3
+ import src.view.view as view
4
+ import chromadb
5
+ from src.retriever.retriever import Retriever
6
+
7
+
8
+
9
+ client_db = chromadb.Client()
10
+
11
+ ctrl = Controller(config, client_db, retriever=Retriever())
12
+ app = view.run(controller=ctrl, config=config)
13
+
14
+ app.queue().launch()
config.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ config = {
4
+ 'templates_path': 'data/templates',
5
+ 'these_docs_path': 'data/examples/',
6
+ 'new_docs_path': 'data/examples/',
7
+ 'default_template_index': 0,
8
+ 'styled_docs_path': 'temp/styles_files',
9
+ 'generated_docs_path': 'temp/generated_files',
10
+ 'options': ["Recentrer les tableaux", "Justifier le texte (Normal)"],
11
+ 'max_styles': 300,
12
+ 'log_msg': {
13
+ 'options_applied': 'Les options suivantes ont été appliquées : \n',
14
+ 'suppressed_styles': 'Les styles suivants ont été supprimés : \n',
15
+ 'modified_styles': 'Les styles suivants ont été modifiés : \n',
16
+ 'added_styles': 'Les styles suivants ont été ajoutés :\n',
17
+ 'modified_style': ' - ',
18
+ 'color': ' la couleur,',
19
+ 'font size': ' la taille de la fonte,',
20
+ 'font': ' la fonte,',
21
+ 'all_caps': ' les majuscules,',
22
+ 'bold': 'le caractère gras',
23
+ 'document': '\n============================\n Sur le document : ',
24
+ },
25
+ }
26
+
27
+ templates = [t for t in os.listdir(config['templates_path']) if t.endswith((".docx"))]
28
+ config.update({'templates': templates})
data/doc.xml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <w:document xmlns:wpc="http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas" xmlns:cx="http://schemas.microsoft.com/office/drawing/2014/chartex" xmlns:cx1="http://schemas.microsoft.com/office/drawing/2015/9/8/chartex" xmlns:cx2="http://schemas.microsoft.com/office/drawing/2015/10/21/chartex" xmlns:cx3="http://schemas.microsoft.com/office/drawing/2016/5/9/chartex" xmlns:cx4="http://schemas.microsoft.com/office/drawing/2016/5/10/chartex" xmlns:cx5="http://schemas.microsoft.com/office/drawing/2016/5/11/chartex" xmlns:cx6="http://schemas.microsoft.com/office/drawing/2016/5/12/chartex" xmlns:cx7="http://schemas.microsoft.com/office/drawing/2016/5/13/chartex" xmlns:cx8="http://schemas.microsoft.com/office/drawing/2016/5/14/chartex" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:aink="http://schemas.microsoft.com/office/drawing/2016/ink" xmlns:am3d="http://schemas.microsoft.com/office/drawing/2017/model3d" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:oel="http://schemas.microsoft.com/office/2019/extlst" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" xmlns:w16cex="http://schemas.microsoft.com/office/word/2018/wordml/cex" xmlns:w16cid="http://schemas.microsoft.com/office/word/2016/wordml/cid" xmlns:w16="http://schemas.microsoft.com/office/word/2018/wordml" xmlns:w16sdtdh="http://schemas.microsoft.com/office/word/2020/wordml/sdtdatahash" xmlns:w16se="http://schemas.microsoft.com/office/word/2015/wordml/symex" xmlns:wpg="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup" xmlns:wpi="http://schemas.microsoft.com/office/word/2010/wordprocessingInk" xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml" xmlns:wps="http://schemas.microsoft.com/office/word/2010/wordprocessingShape" mc:Ignorable="w14 w15 w16se w16cid w16 w16cex w16sdtdh wp14">
2
+ <w:body>
3
+ <w:tbl>
4
+ <w:tblPr>
5
+ <w:tblpPr w:leftFromText="141" w:rightFromText="141" w:vertAnchor="page" w:tblpY="3001"/>
6
+ <w:tblOverlap w:val="never"/>
7
+ <w:tblW w:w="7597" w:type="dxa"/>
8
+ <w:tblLayout w:type="fixed"/>
9
+ <w:tblCellMar><w:left w:w="0" w:type="dxa"/>
10
+ <w:right w:w="0" w:type="dxa"/>
11
+ </w:tblCellMar><w:tblLook w:val="0000" w:firstRow="0" w:lastRow="0" w:firstColumn="0" w:lastColumn="0" w:noHBand="0" w:noVBand="0"/>
12
+ </w:tblPr>
13
+ <w:tblGrid>
14
+ <w:gridCol w:w="7597"/>
15
+ </w:tblGrid>
16
+ <w:tr w:rsidR="008F20A4" w:rsidRPr="00C335CE" w14:paraId="58F81C33" w14:textId="77777777" w:rsidTr="00C4517C">
17
+ <w:trPr><w:cantSplit/>
18
+ <w:trHeight w:hRule="exact" w:val="397"/>
19
+ </w:trPr><w:tc>
20
+ <w:tcPr>
21
+ <w:tcW w:w="7597" w:type="dxa"/>
22
+ <w:shd w:val="clear" w:color="auto" w:fill="auto"/>
23
+ </w:tcPr><w:p w14:paraId="18CBAEA8" w14:textId="77777777" w:rsidR="008F20A4" w:rsidRPr="00C335CE" w:rsidRDefault="008F20A4" w:rsidP="00C4517C">
24
+ <w:pPr><w:pStyle w:val="BodyText"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:t>R&#233;ponse &#224;</w:t></w:r></w:p></w:tc>
25
+ </w:tr><w:tr w:rsidR="008F20A4" w:rsidRPr="00C335CE" w14:paraId="6B21A2A2" w14:textId="77777777" w:rsidTr="00C4517C">
26
+ <w:trPr><w:cantSplit/>
27
+ <w:trHeight w:hRule="exact" w:val="851"/>
28
+ </w:trPr><w:tc><w:tcPr><w:tcW w:w="7597" w:type="dxa"/>
29
+ <w:shd w:val="clear" w:color="auto" w:fill="auto"/><w:vAlign w:val="bottom"/></w:tcPr>
30
+ <w:p w14:paraId="415EC112" w14:textId="77777777" w:rsidR="008F20A4" w:rsidRPr="00C335CE" w:rsidRDefault="008F20A4" w:rsidP="00DE680A">
31
+ <w:pPr><w:pStyle w:val="CompanyName"/>
32
+ <w:framePr w:hSpace="0" w:wrap="auto" w:vAnchor="margin" w:hAnchor="text" w:xAlign="left" w:yAlign="inline"/>
33
+ <w:suppressOverlap w:val="0"/></w:pPr><w:r w:rsidRPr="00C335CE">
34
+ <w:t>Nom du Client</w:t>
35
+ </w:r>
36
+ </w:p>
37
+ </w:tc>
38
+ </w:tr>
39
+ <w:tr w:rsidR="008F20A4" w:rsidRPr="00C335CE" w14:paraId="4CECECE0" w14:textId="77777777" w:rsidTr="00C4517C">
40
+ <w:trPr><w:cantSplit/><w:trHeight w:hRule="exact" w:val="397"/>
41
+ </w:trPr>
42
+ <w:tc>
43
+ <w:tcPr>
44
+ <w:tcW w:w="7597" w:type="dxa"/>
45
+ <w:shd w:val="clear" w:color="auto" w:fill="auto"/>
46
+ </w:tcPr><w:p w14:paraId="04690B8E" w14:textId="77777777" w:rsidR="008F20A4" w:rsidRPr="00C335CE" w:rsidRDefault="008F20A4" w:rsidP="00C4517C"><w:pPr><w:pStyle w:val="BodyText"/></w:pPr><w:proofErr w:type="gramStart"/><w:r w:rsidRPr="00C335CE"><w:t>pour</w:t></w:r><w:proofErr w:type="gramEnd"/><w:r w:rsidRPr="00C335CE"><w:t xml:space="preserve"> le</w:t></w:r></w:p></w:tc></w:tr><w:tr w:rsidR="008F20A4" w:rsidRPr="00C335CE" w14:paraId="10E37A3B" w14:textId="77777777" w:rsidTr="00C4517C"><w:trPr><w:cantSplit/><w:trHeight w:hRule="exact" w:val="1871"/></w:trPr><w:tc><w:tcPr><w:tcW w:w="7597" w:type="dxa"/><w:shd w:val="clear" w:color="auto" w:fill="auto"/><w:vAlign w:val="bottom"/></w:tcPr><w:p w14:paraId="3848203F" w14:textId="77777777" w:rsidR="008F20A4" w:rsidRPr="00C335CE" w:rsidRDefault="008F20A4" w:rsidP="00011EBE"><w:pPr><w:pStyle w:val="ProjectNumber"/><w:framePr w:hSpace="0" w:wrap="auto" w:hAnchor="text" w:xAlign="left" w:yAlign="inline"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:t>Style pour cette page seulement (non recopi&#233; en en-t&#234;te)</w:t></w:r></w:p><w:p w14:paraId="4209E6AE" w14:textId="77777777" w:rsidR="008F20A4" w:rsidRPr="00C335CE" w:rsidRDefault="008F20A4" w:rsidP="00011EBE"><w:pPr><w:pStyle w:val="ProjectName"/><w:framePr w:hSpace="0" w:wrap="auto" w:vAnchor="margin" w:hAnchor="text" w:xAlign="left" w:yAlign="inline"/><w:suppressOverlap w:val="0"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:t>Nom du projet</w:t></w:r></w:p></w:tc></w:tr><w:tr w:rsidR="008F20A4" w:rsidRPr="00C335CE" w14:paraId="5CF53648" w14:textId="77777777" w:rsidTr="00C4517C"><w:trPr><w:cantSplit/><w:trHeight w:hRule="exact" w:val="397"/></w:trPr><w:tc><w:tcPr><w:tcW w:w="7597" w:type="dxa"/><w:shd w:val="clear" w:color="auto" w:fill="auto"/></w:tcPr><w:p w14:paraId="01D4D4D2" w14:textId="77777777" w:rsidR="008F20A4" w:rsidRPr="00C335CE" w:rsidRDefault="008F20A4" w:rsidP="00C4517C"><w:pPr><w:pStyle w:val="BodyText"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:t>Date de remise</w:t></w:r></w:p></w:tc></w:tr><w:tr w:rsidR="008F20A4" w:rsidRPr="00C335CE" w14:paraId="47C4D541" w14:textId="77777777" w:rsidTr="00C4517C"><w:trPr><w:cantSplit/><w:trHeight w:hRule="exact" w:val="397"/></w:trPr><w:tc><w:tcPr><w:tcW w:w="7597" w:type="dxa"/><w:shd w:val="clear" w:color="auto" w:fill="auto"/><w:vAlign w:val="bottom"/></w:tcPr><w:p w14:paraId="463058A0" w14:textId="77777777" w:rsidR="008F20A4" w:rsidRPr="00C335CE" w:rsidRDefault="008F20A4" w:rsidP="00C4517C"><w:pPr><w:pStyle w:val="DateDue"/><w:framePr w:hSpace="0" w:wrap="auto" w:vAnchor="margin" w:hAnchor="text" w:xAlign="left" w:yAlign="inline"/><w:suppressOverlap w:val="0"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:t>JJ/MM/AAAA</w:t></w:r></w:p></w:tc></w:tr><w:tr w:rsidR="008F20A4" w:rsidRPr="00C335CE" w14:paraId="6452573F" w14:textId="77777777" w:rsidTr="00C4517C"><w:trPr><w:cantSplit/><w:trHeight w:hRule="exact" w:val="340"/></w:trPr><w:tc><w:tcPr><w:tcW w:w="7597" w:type="dxa"/><w:shd w:val="clear" w:color="auto" w:fill="auto"/><w:vAlign w:val="bottom"/></w:tcPr><w:p w14:paraId="6536045D" w14:textId="77777777" w:rsidR="008F20A4" w:rsidRPr="00C335CE" w:rsidRDefault="008F20A4" w:rsidP="00C4517C"><w:pPr><w:pStyle w:val="Classification"/><w:framePr w:hSpace="0" w:wrap="auto" w:vAnchor="margin" w:hAnchor="text" w:xAlign="left" w:yAlign="inline"/><w:suppressOverlap w:val="0"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:rPr><w:sz w:val="18"/></w:rPr><w:t>Strictement confidentiel</w:t></w:r></w:p></w:tc></w:tr></w:tbl><w:p w14:paraId="45EA0891" w14:textId="77777777" w:rsidR="007132BD" w:rsidRPr="00C335CE" w:rsidRDefault="005A01BC" w:rsidP="00891B8F"><w:pPr><w:pStyle w:val="documentControl"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:rPr><w:noProof/><w:lang w:eastAsia="fr-FR"/></w:rPr><w:drawing><wp:anchor distT="0" distB="0" distL="114300" distR="114300" simplePos="0" relativeHeight="251659264" behindDoc="1" locked="0" layoutInCell="0" allowOverlap="0" wp14:anchorId="4A040FE9" wp14:editId="18E34965"><wp:simplePos x="0" y="0"/><wp:positionH relativeFrom="margin"><wp:align>right</wp:align></wp:positionH><wp:positionV relativeFrom="margin"><wp:align>bottom</wp:align></wp:positionV><wp:extent cx="6768000" cy="6786000"/><wp:effectExtent l="0" t="0" r="0" b="0"/><wp:wrapNone/><wp:docPr id="11" name="Image 11"/><wp:cNvGraphicFramePr><a:graphicFrameLocks xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" noChangeAspect="1"/></wp:cNvGraphicFramePr><a:graphic xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"><a:graphicData uri="http://schemas.openxmlformats.org/drawingml/2006/picture"><pic:pic xmlns:pic="http://schemas.openxmlformats.org/drawingml/2006/picture"><pic:nvPicPr><pic:cNvPr id="0" name="Picture 2" descr="Page garde offre"/><pic:cNvPicPr><a:picLocks noChangeAspect="1" noChangeArrowheads="1"/></pic:cNvPicPr></pic:nvPicPr><pic:blipFill><a:blip r:embed="rId11"><a:extLst><a:ext uri="{28A0092B-C50C-407E-A947-70E740481C1C}"><a14:useLocalDpi xmlns:a14="http://schemas.microsoft.com/office/drawing/2010/main" val="0"/></a:ext></a:extLst></a:blip><a:stretch><a:fillRect/></a:stretch></pic:blipFill><pic:spPr bwMode="auto"><a:xfrm><a:off x="0" y="0"/><a:ext cx="6768000" cy="6786000"/></a:xfrm><a:prstGeom prst="rect"><a:avLst/></a:prstGeom><a:noFill/><a:ln><a:noFill/></a:ln></pic:spPr></pic:pic></a:graphicData></a:graphic><wp14:sizeRelH relativeFrom="page"><wp14:pctWidth>0</wp14:pctWidth></wp14:sizeRelH><wp14:sizeRelV relativeFrom="page"><wp14:pctHeight>0</wp14:pctHeight></wp14:sizeRelV></wp:anchor></w:drawing></w:r><w:r w:rsidRPr="00C335CE"><w:br w:type="page"/></w:r><w:proofErr w:type="gramStart"/><w:r w:rsidR="00B9348B" w:rsidRPr="00C335CE"><w:lastRenderedPageBreak/><w:t>clause</w:t></w:r><w:proofErr w:type="gramEnd"/><w:r w:rsidR="00B9348B" w:rsidRPr="00C335CE"><w:t xml:space="preserve"> de confidentialit&#233;</w:t></w:r></w:p><w:p w14:paraId="3E57861B" w14:textId="77777777" w:rsidR="007132BD" w:rsidRPr="00C335CE" w:rsidRDefault="00115837" w:rsidP="00891B8F"><w:pPr><w:pStyle w:val="ProprietaryNoticeText"/><w:rPr><w:color w:val="595959"/></w:rPr></w:pPr><w:r w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:t xml:space="preserve">Toute </w:t></w:r><w:r w:rsidR="007132BD" w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:t xml:space="preserve">information </w:t></w:r><w:r w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:t xml:space="preserve">contenue dans ce </w:t></w:r><w:r w:rsidR="007132BD" w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:t xml:space="preserve">document </w:t></w:r><w:r w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:t xml:space="preserve">strictement confidentiel est fournie &#224; </w:t></w:r><w:r w:rsidR="007132BD" w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:fldChar w:fldCharType="begin"/></w:r><w:r w:rsidR="007132BD" w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:instrText xml:space="preserve"> STYLEREF .CompanyName \\* MERGEFORMAT </w:instrText></w:r><w:r w:rsidR="007132BD" w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:fldChar w:fldCharType="separate"/></w:r><w:r w:rsidR="000C19BE"><w:rPr><w:noProof/><w:color w:val="595959"/></w:rPr><w:t>Nom du Client</w:t></w:r><w:r w:rsidR="007132BD" w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:fldChar w:fldCharType="end"/></w:r><w:r w:rsidR="007132BD" w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:t xml:space="preserve"> </w:t></w:r><w:r w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:t>dans le seul but de r&#233;pondre &#224; ses demandes et ne peut &#234;tre utilis&#233;</w:t></w:r><w:r w:rsidR="007A159B" w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:t>e</w:t></w:r><w:r w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:t xml:space="preserve"> &#224; d&#8217;autres fins.</w:t></w:r></w:p><w:p w14:paraId="3EC09512" w14:textId="77777777" w:rsidR="007132BD" w:rsidRPr="00C335CE" w:rsidRDefault="007132BD" w:rsidP="007132BD"><w:pPr><w:pStyle w:val="ProprietaryNoticeText"/><w:rPr><w:color w:val="595959"/></w:rPr></w:pPr><w:r w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:fldChar w:fldCharType="begin"/></w:r><w:r w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:instrText xml:space="preserve"> STYLEREF .CompanyName \\* MERGEFORMAT </w:instrText></w:r><w:r w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:fldChar w:fldCharType="separate"/></w:r><w:r w:rsidR="000C19BE"><w:rPr><w:noProof/><w:color w:val="595959"/></w:rPr><w:t>Nom du Client</w:t></w:r><w:r w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:fldChar w:fldCharType="end"/></w:r><w:r w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:t xml:space="preserve"> </w:t></w:r><w:r w:rsidR="00115837" w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:t>s&#8217;engage &#224; ne pas publier ni faire conna&#238;tre tout ou partie de ces informations &#224; quelque tierce partie que ce soit sans l&#8217;autorisation pr&#233;alable d&#8217;</w:t></w:r><w:r w:rsidR="00712552" w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:t>Orange</w:t></w:r><w:r w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:t>.</w:t></w:r></w:p><w:p w14:paraId="7D23B684" w14:textId="77777777" w:rsidR="007132BD" w:rsidRPr="00C335CE" w:rsidRDefault="007132BD" w:rsidP="007132BD"><w:pPr><w:pStyle w:val="ProprietaryNoticeText"/><w:rPr><w:color w:val="595959"/></w:rPr></w:pPr><w:r w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:t>&#169; copyright 201</w:t></w:r><w:r w:rsidR="00DA1A27"><w:rPr><w:color w:val="595959"/></w:rPr><w:t>8</w:t></w:r></w:p><w:p w14:paraId="5E1DE421" w14:textId="77777777" w:rsidR="007132BD" w:rsidRPr="00C335CE" w:rsidRDefault="00115837" w:rsidP="00C21D48"><w:pPr><w:pStyle w:val="ProprietaryNoticeText"/><w:spacing w:after="2800"/><w:rPr><w:color w:val="595959"/></w:rPr></w:pPr><w:r w:rsidRPr="00C335CE"><w:rPr><w:color w:val="595959"/></w:rPr><w:t>Tous droits r&#233;serv&#233;s</w:t></w:r></w:p><w:p w14:paraId="3BC5C1B2" w14:textId="77777777" w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w:rsidRDefault="00AF69C7" w:rsidP="00DA1A27"><w:pPr><w:pStyle w:val="PointsContact"/><w:spacing w:before="6000"/></w:pPr><w:proofErr w:type="gramStart"/><w:r w:rsidRPr="00C335CE"><w:t>votre</w:t></w:r><w:proofErr w:type="gramEnd"/><w:r w:rsidRPr="00C335CE"><w:t xml:space="preserve"> contact</w:t></w:r></w:p><w:tbl><w:tblPr><w:tblW w:w="8505" w:type="dxa"/><w:tblInd w:w="85" w:type="dxa"/><w:tblBorders><w:top w:val="single" w:sz="4" w:space="0" w:color="808080"/><w:left w:val="single" w:sz="4" w:space="0" w:color="808080"/><w:bottom w:val="single" w:sz="4" w:space="0" w:color="808080"/><w:right w:val="single" w:sz="4" w:space="0" w:color="808080"/><w:insideH w:val="single" w:sz="4" w:space="0" w:color="808080"/><w:insideV w:val="single" w:sz="4" w:space="0" w:color="808080"/></w:tblBorders><w:tblLayout w:type="fixed"/><w:tblCellMar><w:left w:w="85" w:type="dxa"/><w:right w:w="85" w:type="dxa"/></w:tblCellMar><w:tblLook w:val="0000" w:firstRow="0" w:lastRow="0" w:firstColumn="0" w:lastColumn="0" w:noHBand="0" w:noVBand="0"/></w:tblPr><w:tblGrid><w:gridCol w:w="1095"/><w:gridCol w:w="3264"/><w:gridCol w:w="900"/><w:gridCol w:w="3246"/></w:tblGrid><w:tr w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w14:paraId="64609664" w14:textId="77777777" w:rsidTr="009564E8"><w:trPr><w:cantSplit/></w:trPr><w:tc><w:tcPr><w:tcW w:w="1095" w:type="dxa"/><w:vAlign w:val="center"/></w:tcPr><w:p w14:paraId="2CA49F3C" w14:textId="77777777" w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w:rsidRDefault="00AF69C7" w:rsidP="00FF5DD0"><w:pPr><w:pStyle w:val="CoordonnesContacts"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:t>Nom</w:t></w:r><w:r w:rsidR="00FF5DD0" w:rsidRPr="00C335CE"><w:t xml:space="preserve"> </w:t></w:r><w:r w:rsidRPr="00C335CE"><w:t>:</w:t></w:r></w:p></w:tc><w:tc><w:tcPr><w:tcW w:w="7410" w:type="dxa"/><w:gridSpan w:val="3"/><w:vAlign w:val="center"/></w:tcPr><w:p w14:paraId="427D84E8" w14:textId="77777777" w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w:rsidRDefault="00AF69C7" w:rsidP="00AF69C7"><w:pPr><w:pStyle w:val="CoordonnesContacts"/></w:pPr></w:p></w:tc></w:tr><w:tr w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w14:paraId="04E8E560" w14:textId="77777777" w:rsidTr="009564E8"><w:tc><w:tcPr><w:tcW w:w="1095" w:type="dxa"/><w:vAlign w:val="center"/></w:tcPr><w:p w14:paraId="6876598F" w14:textId="77777777" w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w:rsidRDefault="00AF69C7" w:rsidP="00FF5DD0"><w:pPr><w:pStyle w:val="CoordonnesContacts"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:t>Titre</w:t></w:r><w:r w:rsidR="00FF5DD0" w:rsidRPr="00C335CE"><w:t xml:space="preserve"> </w:t></w:r><w:r w:rsidRPr="00C335CE"><w:t>:</w:t></w:r></w:p></w:tc><w:tc><w:tcPr><w:tcW w:w="3264" w:type="dxa"/><w:vAlign w:val="center"/></w:tcPr><w:p w14:paraId="1F56BDFC" w14:textId="77777777" w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w:rsidRDefault="00AF69C7" w:rsidP="00AF69C7"><w:pPr><w:pStyle w:val="CoordonnesContacts"/></w:pPr></w:p></w:tc><w:tc><w:tcPr><w:tcW w:w="900" w:type="dxa"/><w:vAlign w:val="center"/></w:tcPr><w:p w14:paraId="5DC329C3" w14:textId="77777777" w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w:rsidRDefault="00FF5DD0" w:rsidP="00AF69C7"><w:pPr><w:pStyle w:val="CoordonnesContacts"/></w:pPr><w:proofErr w:type="gramStart"/><w:r w:rsidRPr="00C335CE"><w:t>Email</w:t></w:r><w:proofErr w:type="gramEnd"/><w:r w:rsidRPr="00C335CE"><w:t xml:space="preserve"> </w:t></w:r><w:r w:rsidR="00AF69C7" w:rsidRPr="00C335CE"><w:t>:</w:t></w:r></w:p></w:tc><w:tc><w:tcPr><w:tcW w:w="3246" w:type="dxa"/><w:vAlign w:val="center"/></w:tcPr><w:p w14:paraId="2326070B" w14:textId="77777777" w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w:rsidRDefault="00AF69C7" w:rsidP="00AF69C7"><w:pPr><w:pStyle w:val="CoordonnesContacts"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:t>@orange.com</w:t></w:r></w:p></w:tc></w:tr><w:tr w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w14:paraId="470FE1CB" w14:textId="77777777" w:rsidTr="009564E8"><w:tc><w:tcPr><w:tcW w:w="1095" w:type="dxa"/><w:vAlign w:val="center"/></w:tcPr><w:p w14:paraId="75B44613" w14:textId="77777777" w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w:rsidRDefault="00AF69C7" w:rsidP="00FF5DD0"><w:pPr><w:pStyle w:val="CoordonnesContacts"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:t>T&#233;l</w:t></w:r><w:r w:rsidR="00FF5DD0" w:rsidRPr="00C335CE"><w:t xml:space="preserve"> </w:t></w:r><w:r w:rsidRPr="00C335CE"><w:t>:</w:t></w:r></w:p></w:tc><w:tc><w:tcPr><w:tcW w:w="3264" w:type="dxa"/><w:vAlign w:val="center"/></w:tcPr><w:p w14:paraId="1FF13B91" w14:textId="77777777" w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w:rsidRDefault="00AF69C7" w:rsidP="00AF69C7"><w:pPr><w:pStyle w:val="CoordonnesContacts"/></w:pPr></w:p></w:tc><w:tc><w:tcPr><w:tcW w:w="900" w:type="dxa"/><w:vAlign w:val="center"/></w:tcPr><w:p w14:paraId="0486FC7A" w14:textId="77777777" w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w:rsidRDefault="00AF69C7" w:rsidP="00FF5DD0"><w:pPr><w:pStyle w:val="CoordonnesContacts"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:t>Mobile</w:t></w:r><w:r w:rsidR="00FF5DD0" w:rsidRPr="00C335CE"><w:t xml:space="preserve"> </w:t></w:r><w:r w:rsidRPr="00C335CE"><w:t>:</w:t></w:r></w:p></w:tc><w:tc><w:tcPr><w:tcW w:w="3246" w:type="dxa"/><w:vAlign w:val="center"/></w:tcPr><w:p w14:paraId="49907CC8" w14:textId="77777777" w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w:rsidRDefault="00AF69C7" w:rsidP="00AF69C7"><w:pPr><w:pStyle w:val="CoordonnesContacts"/></w:pPr></w:p></w:tc></w:tr><w:tr w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w14:paraId="071A9502" w14:textId="77777777" w:rsidTr="009564E8"><w:trPr><w:cantSplit/></w:trPr><w:tc><w:tcPr><w:tcW w:w="1095" w:type="dxa"/><w:vAlign w:val="center"/></w:tcPr><w:p w14:paraId="4755FA1E" w14:textId="77777777" w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w:rsidRDefault="00D221F1" w:rsidP="00FF5DD0"><w:pPr><w:pStyle w:val="CoordonnesContacts"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:t>A</w:t></w:r><w:r w:rsidR="00AF69C7" w:rsidRPr="00C335CE"><w:t>dresse</w:t></w:r><w:r w:rsidR="00FF5DD0" w:rsidRPr="00C335CE"><w:t xml:space="preserve"> </w:t></w:r><w:r w:rsidR="00AF69C7" w:rsidRPr="00C335CE"><w:t>:</w:t></w:r></w:p></w:tc><w:tc><w:tcPr><w:tcW w:w="7410" w:type="dxa"/><w:gridSpan w:val="3"/><w:vAlign w:val="center"/></w:tcPr><w:p w14:paraId="486934C7" w14:textId="77777777" w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w:rsidRDefault="00AF69C7" w:rsidP="00AF69C7"><w:pPr><w:pStyle w:val="CoordonnesContacts"/></w:pPr></w:p></w:tc></w:tr><w:tr w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w14:paraId="74B2F971" w14:textId="77777777" w:rsidTr="009564E8"><w:trPr><w:cantSplit/></w:trPr><w:tc><w:tcPr><w:tcW w:w="1095" w:type="dxa"/><w:vAlign w:val="center"/></w:tcPr><w:p w14:paraId="21D3119F" w14:textId="77777777" w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w:rsidRDefault="00AF69C7" w:rsidP="00FF5DD0"><w:pPr><w:pStyle w:val="CoordonnesContacts"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:t>Site Web</w:t></w:r><w:r w:rsidR="00FF5DD0" w:rsidRPr="00C335CE"><w:t xml:space="preserve"> </w:t></w:r><w:r w:rsidRPr="00C335CE"><w:t>:</w:t></w:r></w:p></w:tc><w:tc><w:tcPr><w:tcW w:w="7410" w:type="dxa"/><w:gridSpan w:val="3"/><w:vAlign w:val="center"/></w:tcPr><w:p w14:paraId="3957B477" w14:textId="77777777" w:rsidR="00AF69C7" w:rsidRPr="00C335CE" w:rsidRDefault="00C168A5" w:rsidP="00C168A5"><w:pPr><w:pStyle w:val="CoordonnesContacts"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:t>http://www.</w:t></w:r><w:r w:rsidR="00AF69C7" w:rsidRPr="00C335CE"><w:t>orange-business.com</w:t></w:r></w:p></w:tc></w:tr></w:tbl><w:p w14:paraId="11185772" w14:textId="77777777" w:rsidR="008A617E" w:rsidRPr="00C335CE" w:rsidRDefault="008A617E" w:rsidP="008A617E"><w:pPr><w:pStyle w:val="BodyText"/></w:pPr></w:p><w:p w14:paraId="6DDF5966" w14:textId="77777777" w:rsidR="007132BD" w:rsidRPr="00C335CE" w:rsidRDefault="007132BD" w:rsidP="008A617E"><w:pPr><w:pStyle w:val="BodyText"/><w:sectPr w:rsidR="007132BD" w:rsidRPr="00C335CE" w:rsidSect="002F63F5"><w:headerReference w:type="even" r:id="rId12"/><w:headerReference w:type="default" r:id="rId13"/><w:footerReference w:type="even" r:id="rId14"/><w:footerReference w:type="default" r:id="rId15"/><w:headerReference w:type="first" r:id="rId16"/><w:footerReference w:type="first" r:id="rId17"/><w:pgSz w:w="11906" w:h="16838" w:code="9"/><w:pgMar w:top="720" w:right="720" w:bottom="720" w:left="720" w:header="0" w:footer="0" w:gutter="0"/><w:cols w:space="708"/><w:titlePg/><w:docGrid w:linePitch="360"/></w:sectPr></w:pPr></w:p><w:p w14:paraId="2C75859B" w14:textId="77777777" w:rsidR="007132BD" w:rsidRPr="00C335CE" w:rsidRDefault="007132BD" w:rsidP="00C168A5"><w:pPr><w:pStyle w:val="STitre1"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:lastRenderedPageBreak/><w:t xml:space="preserve">Table </w:t></w:r><w:r w:rsidR="00FA453C" w:rsidRPr="00C335CE"><w:t>des</w:t></w:r><w:r w:rsidRPr="00C335CE"><w:t xml:space="preserve"> </w:t></w:r><w:r w:rsidR="00FA453C" w:rsidRPr="00C335CE"><w:t>mati&#232;res</w:t></w:r></w:p><w:p w14:paraId="33936659" w14:textId="77777777" w:rsidR="007132BD" w:rsidRPr="00C335CE" w:rsidRDefault="00000000" w:rsidP="00F16138"><w:pPr><w:pStyle w:val="TM1"/></w:pPr><w:r><w:fldChar w:fldCharType="begin"/></w:r><w:r><w:instrText xml:space="preserve"> TOC \\o "1-3" \\h \\z \\u </w:instrText></w:r><w:r><w:fldChar w:fldCharType="separate"/></w:r><w:r w:rsidR="001F5250" w:rsidRPr="00C335CE"><w:rPr><w:noProof/></w:rPr><w:t xml:space="preserve">Aucune entr&#233;e de table des </w:t></w:r><w:r w:rsidR="001F5250" w:rsidRPr="00C335CE"><w:t>mati&#232;res</w:t></w:r><w:r w:rsidR="001F5250" w:rsidRPr="00C335CE"><w:rPr><w:noProof/></w:rPr><w:t xml:space="preserve"> n\'a &#233;t&#233; trouv&#233;e.</w:t></w:r><w:r><w:rPr><w:noProof/></w:rPr><w:fldChar w:fldCharType="end"/></w:r></w:p><w:p w14:paraId="38CA3268" w14:textId="77777777" w:rsidR="009A37C6" w:rsidRPr="00C335CE" w:rsidRDefault="009A37C6" w:rsidP="0048016E"><w:pPr><w:pStyle w:val="STitre1"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:t>Liste des tableaux</w:t></w:r></w:p><w:p w14:paraId="3FFCA3F6" w14:textId="77777777" w:rsidR="009A37C6" w:rsidRPr="00C335CE" w:rsidRDefault="009A37C6" w:rsidP="002F4EEA"><w:pPr><w:pStyle w:val="Tabledesillustrations"/><w:rPr><w:lang w:val="fr-FR"/></w:rPr></w:pPr><w:r w:rsidRPr="00C335CE"><w:rPr><w:lang w:val="fr-FR"/></w:rPr><w:fldChar w:fldCharType="begin"/></w:r><w:r w:rsidRPr="00C335CE"><w:rPr><w:lang w:val="fr-FR"/></w:rPr><w:instrText xml:space="preserve"> TOC \\h \\z \\c "Tableau" </w:instrText></w:r><w:r w:rsidRPr="00C335CE"><w:rPr><w:lang w:val="fr-FR"/></w:rPr><w:fldChar w:fldCharType="separate"/></w:r><w:r w:rsidR="002F4EEA" w:rsidRPr="00C335CE"><w:rPr><w:lang w:val="fr-FR"/></w:rPr><w:t>Aucune entr&#233;e de table d\'illustration n\'a &#233;t&#233; trouv&#233;e.</w:t></w:r><w:r w:rsidRPr="00C335CE"><w:rPr><w:lang w:val="fr-FR"/></w:rPr><w:fldChar w:fldCharType="end"/></w:r></w:p><w:p w14:paraId="5A2AE896" w14:textId="77777777" w:rsidR="009A37C6" w:rsidRPr="00C335CE" w:rsidRDefault="009A37C6" w:rsidP="0048016E"><w:pPr><w:pStyle w:val="STitre1"/></w:pPr><w:r w:rsidRPr="00C335CE"><w:t>Liste des figures</w:t></w:r></w:p><w:p w14:paraId="3B29A2A0" w14:textId="77777777" w:rsidR="009A37C6" w:rsidRPr="00C335CE" w:rsidRDefault="009A37C6" w:rsidP="002F4EEA"><w:pPr><w:pStyle w:val="Tabledesillustrations"/><w:rPr><w:lang w:val="fr-FR"/></w:rPr></w:pPr><w:r w:rsidRPr="00C335CE"><w:rPr><w:lang w:val="fr-FR"/></w:rPr><w:fldChar w:fldCharType="begin"/></w:r><w:r w:rsidRPr="00C335CE"><w:rPr><w:lang w:val="fr-FR"/></w:rPr><w:instrText xml:space="preserve"> TOC \\h \\z \\c "Figure" </w:instrText></w:r><w:r w:rsidRPr="00C335CE"><w:rPr><w:lang w:val="fr-FR"/></w:rPr><w:fldChar w:fldCharType="separate"/></w:r><w:r w:rsidR="001F5250" w:rsidRPr="00C335CE"><w:rPr><w:lang w:val="fr-FR"/></w:rPr><w:t>Aucune entr&#233;e de table d\'illustration n\'a &#233;t&#233; trouv&#233;e.</w:t></w:r><w:r w:rsidRPr="00C335CE"><w:rPr><w:lang w:val="fr-FR"/></w:rPr><w:fldChar w:fldCharType="end"/></w:r></w:p><w:p w14:paraId="4750AFB0" w14:textId="77777777" w:rsidR="00F36D8E" w:rsidRPr="00C335CE" w:rsidRDefault="00F36D8E" w:rsidP="00F36D8E"/><w:p w14:paraId="7D70CB6D" w14:textId="77777777" w:rsidR="000836AE" w:rsidRPr="00C335CE" w:rsidRDefault="000836AE" w:rsidP="000836AE"><w:pPr><w:pStyle w:val="BodyText"/><w:sectPr w:rsidR="000836AE" w:rsidRPr="00C335CE" w:rsidSect="005302A5"><w:headerReference w:type="even" r:id="rId18"/><w:headerReference w:type="default" r:id="rId19"/><w:footerReference w:type="even" r:id="rId20"/><w:footerReference w:type="default" r:id="rId21"/><w:headerReference w:type="first" r:id="rId22"/><w:footerReference w:type="first" r:id="rId23"/><w:pgSz w:w="11906" w:h="16838" w:code="9"/><w:pgMar w:top="720" w:right="720" w:bottom="720" w:left="720" w:header="0" w:footer="0" w:gutter="0"/><w:cols w:space="708"/><w:docGrid w:linePitch="360"/></w:sectPr></w:pPr></w:p><w:p w14:paraId="6B39024A" w14:textId="77777777" w:rsidR="001E4CDD" w:rsidRPr="00ED1502" w:rsidRDefault="001E4CDD" w:rsidP="00ED1502"><w:pPr><w:pStyle w:val="BodyText"/></w:pPr></w:p><w:p w14:paraId="0519CE18" w14:textId="3F897D6A" w:rsidR="0048016E" w:rsidRDefault="00ED038F" w:rsidP="00ED038F"><w:pPr><w:pStyle w:val="Titre10"/><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr></w:pPr><w:proofErr w:type="spellStart"/><w:r><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr><w:lastRenderedPageBreak/><w:t>Ccc</w:t></w:r><w:proofErr w:type="spellEnd"/></w:p><w:p w14:paraId="3BC69584" w14:textId="48227D67" w:rsidR="00ED038F" w:rsidRDefault="00ED038F" w:rsidP="00ED038F"><w:pPr><w:pStyle w:val="Titre20"/></w:pPr><w:proofErr w:type="spellStart"/><w:r><w:t>Qsdd</w:t></w:r><w:proofErr w:type="spellEnd"/></w:p><w:p w14:paraId="747EE9A7" w14:textId="5DFB2DB0" w:rsidR="00ED038F" w:rsidRDefault="00947006" w:rsidP="00845F4B"><w:pPr><w:pStyle w:val="BodyText"/><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr></w:pPr><w:proofErr w:type="spellStart"/><w:r w:rsidRPr="00845F4B"><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr><w:t>Dsbvbvn</w:t></w:r><w:proofErr w:type="spellEnd"/></w:p><w:p w14:paraId="21497A0D" w14:textId="77777777" w:rsidR="00947006" w:rsidRPr="00845F4B" w:rsidRDefault="00947006" w:rsidP="00845F4B"><w:pPr><w:pStyle w:val="BodyText"/><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr></w:pPr></w:p><w:p w14:paraId="383106B5" w14:textId="5ACDB52E" w:rsidR="00947006" w:rsidRDefault="00947006" w:rsidP="00947006"><w:pPr><w:pStyle w:val="Bullet1"/><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr></w:pPr><w:proofErr w:type="spellStart"/><w:r><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr><w:t>Fezjfz</w:t></w:r><w:proofErr w:type="spellEnd"/></w:p><w:p w14:paraId="2C331B80" w14:textId="3A7D6E23" w:rsidR="00947006" w:rsidRDefault="00947006" w:rsidP="00947006"><w:pPr><w:pStyle w:val="Bullet1"/><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr></w:pPr><w:proofErr w:type="spellStart"/><w:r><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr><w:t>Jzekkfjk</w:t></w:r><w:proofErr w:type="spellEnd"/></w:p><w:p w14:paraId="603BA9F8" w14:textId="32546CFA" w:rsidR="00947006" w:rsidRPr="00845F4B" w:rsidRDefault="00845F4B" w:rsidP="00845F4B"><w:pPr><w:pStyle w:val="BodyText"/><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr></w:pPr><w:r w:rsidRPr="00845F4B"><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr><w:t xml:space="preserve"> </w:t></w:r><w:proofErr w:type="spellStart"/><w:r w:rsidRPr="00845F4B"><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr><w:t>Nf</w:t></w:r><w:proofErr w:type="spellEnd"/><w:r w:rsidRPr="00845F4B"><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr><w:t xml:space="preserve"> </w:t></w:r><w:proofErr w:type="spellStart"/><w:r w:rsidRPr="00845F4B"><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr><w:t>nvf</w:t></w:r><w:proofErr w:type="spellEnd"/><w:r w:rsidRPr="00845F4B"><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr><w:t xml:space="preserve"> </w:t></w:r><w:proofErr w:type="spellStart"/><w:proofErr w:type="gramStart"/><w:r w:rsidRPr="00845F4B"><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr><w:t>z,v</w:t></w:r><w:proofErr w:type="spellEnd"/><w:proofErr w:type="gramEnd"/><w:r w:rsidRPr="00845F4B"><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr><w:t>$</w:t></w:r></w:p><w:p w14:paraId="1E6020A0" w14:textId="77777777" w:rsidR="00845F4B" w:rsidRPr="00845F4B" w:rsidRDefault="00845F4B" w:rsidP="00845F4B"><w:pPr><w:pStyle w:val="BodyText"/><w:rPr><w:rStyle w:val="TexteOrange"/></w:rPr></w:pPr></w:p><w:sectPr w:rsidR="00845F4B" w:rsidRPr="00845F4B" w:rsidSect="00502252"><w:headerReference w:type="default" r:id="rId24"/><w:footerReference w:type="default" r:id="rId25"/><w:pgSz w:w="11906" w:h="16838" w:code="9"/><w:pgMar w:top="720" w:right="720" w:bottom="720" w:left="720" w:header="0" w:footer="0" w:gutter="0"/><w:cols w:space="708"/><w:docGrid w:linePitch="360"/></w:sectPr></w:body></w:document>
data/templates/Template_presentation.docx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93d264f72e69de63159803b9676a6d28e30946b478151f802f135798a2a71f71
3
+ size 146771
requirements.txt ADDED
Binary file (5.38 kB). View file
 
src/control/controller.py ADDED
@@ -0,0 +1,285 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import os
3
+ from typing import Dict
4
+ import random
5
+ import datetime
6
+ import string
7
+ import docx
8
+ from src.tools.doc_tools import get_title
9
+ from src.domain.doc import Doc
10
+ from src.domain.wikidoc import WikiPage
11
+ from src.view.log_msg import create_msg_from
12
+ import src.tools.semantic_db as semantic_db
13
+ from src.tools.wiki import Wiki
14
+ from src.llm.llm_tools import get_wikilist, get_public_paragraph, get_private_paragraph
15
+ from src.tools.semantic_db import add_texts_to_collection, query_collection
16
+ import gradio as gr
17
+ from src.retriever.retriever import Retriever
18
+
19
+ class Controller:
20
+
21
+ def __init__(self, config: Dict, client_db, retriever):
22
+ self.templates_path = config['templates_path']
23
+ self.generated_docs_path = config['generated_docs_path']
24
+ self.styled_docs_path = config['styled_docs_path']
25
+ self.new_docs = []
26
+ self.gen_docs = []
27
+ self.input_csv = ""
28
+ template_path = config['templates_path'] + '/' + config['templates'][config['default_template_index']]
29
+ self.default_template = Doc(template_path)
30
+ self.template = self.default_template
31
+ self.log = []
32
+ self.differences = []
33
+ self.list_differences = []
34
+ self.client_db = client_db
35
+ self.retriever = retriever
36
+
37
+ def copy_docs(self, temp_docs: []):
38
+ """
39
+ Initial copy of the incoming document
40
+ +
41
+ create collection for requirments retrieval
42
+ +
43
+ Initiate paths
44
+
45
+ TODO: Rename or refactor the function -> 1 mission / function
46
+ TODO: To be tested on several documents
47
+ TODO: Rename create_collection in create_requirement_collection
48
+ """
49
+ doc_names = [doc.name for doc in temp_docs]
50
+ for i in range(len(doc_names)):
51
+ if '/' in doc_names[i]:
52
+ doc_names[i] = doc_names[i].split('/')[-1]
53
+ elif '\\' in doc_names[i]:
54
+ doc_names[i] = doc_names[i].split('\\')[-1]
55
+ doc_names[i] = doc_names[i].split('.')[0]
56
+ docs = [Doc(path=doc.name) for doc in temp_docs]
57
+ self.create_collection(docs)
58
+ style_paths = [f"{self.generated_docs_path}/{dn}_.docx" for dn in doc_names]
59
+ gen_paths = [f"{self.generated_docs_path}/{dn}_e.docx" for dn in doc_names]
60
+ for doc, style_path, gen_path in zip(docs, style_paths, gen_paths):
61
+ new_doc = doc.copy(style_path)
62
+ self.new_docs.append(new_doc)
63
+
64
+ def clear_docs(self):
65
+ for new_doc in self.new_docs:
66
+ if os.path.exists(new_doc.path):
67
+ new_doc.clear()
68
+ for gen_doc in self.gen_docs:
69
+ if os.path.exists(gen_doc.path):
70
+ gen_doc.clear()
71
+ self.new_docs = []
72
+ self.gen_docs = []
73
+ self.log = []
74
+ path_to_clear = os.path.abspath(self.generated_docs_path)
75
+ [os.remove(f"{path_to_clear}/{doc}") for doc in os.listdir(path_to_clear)]
76
+
77
+ def set_template(self, template_name: str = ""):
78
+ if not template_name:
79
+ self.template = self.default_template
80
+ else:
81
+ template_path = f"{self.templates_path}/{template_name}"
82
+ self.template = Doc(template_path)
83
+
84
+ def add_template(self, template_path: str):
85
+ """
86
+ TODO: message to be but in config
87
+ """
88
+ if not template_path:
89
+ return
90
+ elif not template_path.name.endswith(".docx"):
91
+ gr.Warning("Seuls les fichiers .docx sont acceptés")
92
+ return
93
+ doc = docx.Document(template_path.name)
94
+ doc.save(self.templates_path + '/' + get_title(template_path.name))
95
+
96
+ def delete_curr_template(self, template_name: str):
97
+ if not template_name:
98
+ return
99
+ os.remove(f"{self.templates_path}/{template_name}")
100
+
101
+ def retrieve_number_of_misapplied_styles(self):
102
+ """
103
+ not used: buggy !!
104
+ """
105
+ res = {}
106
+ for new_doc in self.new_docs:
107
+ res[new_doc] = new_doc.retrieve_number_of_misapplied_styles()
108
+ return res
109
+
110
+ def get_difference_with_template(self):
111
+ self.differences = []
112
+ for new_doc in self.new_docs:
113
+ diff_styles = new_doc.get_different_styles_with_template(template=self.template)
114
+ diff_dicts = [{'doc': new_doc, 'style': s} for s in diff_styles]
115
+ self.differences += diff_dicts
116
+ template_styles = self.template.xdoc.styles
117
+ template_styles = [style for style in template_styles if style.name in self.template.styles.names]
118
+ return self.differences, template_styles
119
+
120
+ def get_list_styles(self):
121
+ self.list_differences = []
122
+ for new_doc in self.new_docs:
123
+ list_styles = new_doc.get_list_styles()
124
+ all_lists_styles = [{'doc': new_doc, 'list_style': s} for s in list_styles]
125
+ self.list_differences += all_lists_styles
126
+ return self.list_differences
127
+
128
+ def map_style(self, this_style_index: int, template_style_name: str):
129
+ """
130
+ maps a style from 'this' document into a style from the template
131
+ """
132
+ #dont make any change if the style is already the same
133
+ diff_dict = self.differences[this_style_index]
134
+ doc = diff_dict['doc']
135
+ this_style_name = diff_dict['style']
136
+ log = doc.copy_one_style(this_style_name, template_style_name, self.template)
137
+ if log:
138
+ self.log.append({doc.name: log})
139
+
140
+ def update_list_style(self, this_style_index: int, template_style_name: str):
141
+ """
142
+ maps a style from 'this' document into a style from the template
143
+ """
144
+ #dont make any change if the style is already the same
145
+ diff_dict = self.list_differences[this_style_index]
146
+ doc = diff_dict['doc']
147
+ this_style_name = diff_dict['list_style']
148
+ log = doc.change_bullet_style(this_style_name, template_style_name, self.template)
149
+ if log:
150
+ self.log.append({doc.name: log})
151
+
152
+ def update_style(self,index,style_to_modify):
153
+ return self.map_style(index, style_to_modify) if style_to_modify else None
154
+
155
+ def apply_template(self, options_list):
156
+ for new_doc in self.new_docs:
157
+ log = new_doc.apply_template(template=self.template, options_list=options_list)
158
+ if log:
159
+ self.log.append({new_doc.name: log})
160
+
161
+ def reset(self):
162
+ for new_doc in self.new_docs:
163
+ new_doc.delete()
164
+ for gen_doc in self.gen_docs:
165
+ gen_doc.delete()
166
+ self.new_docs = []
167
+ self.gen_docs = []
168
+
169
+
170
+ def get_log(self):
171
+ msg_log = create_msg_from(self.log, self.new_docs)
172
+ return msg_log
173
+
174
+ """
175
+ Source Control
176
+ """
177
+
178
+ def get_or_create_collection(self, id_: str) -> str:
179
+ """
180
+ generates a new id if needed
181
+ TODO: rename into get_or_create_generation_collection
182
+ TODO: have a single DB with separate collections, one for requirements, one for generation
183
+ """
184
+ if id_ != '-1':
185
+ return id_
186
+ else:
187
+ now = datetime.datetime.now().strftime("%m%d%H%M")
188
+ letters = string.ascii_lowercase + string.digits
189
+ id_ = now + '-' + ''.join(random.choice(letters) for _ in range(10))
190
+ semantic_db.get_or_create_collection(id_)
191
+ return id_
192
+
193
+ async def wiki_fetch(self) -> [str]:
194
+ """
195
+ returns the title of the wikipages corresponding to the tasks described in the input text
196
+ """
197
+ all_tasks = []
198
+ for new_doc in self.new_docs:
199
+ all_tasks += new_doc.tasks
200
+ async_tasks = [asyncio.create_task(get_wikilist(task)) for task in all_tasks]
201
+ wiki_lists = await asyncio.gather(*async_tasks)
202
+ flatten_wiki_list = list(set().union(*[set(w) for w in wiki_lists]))
203
+ return flatten_wiki_list
204
+
205
+ async def wiki_upload_and_store(self, wiki_title: str, collection_name: str):
206
+ """
207
+ uploads one wikipage and stores them into the right collection
208
+ """
209
+ wikipage = Wiki().fetch(wiki_title)
210
+ wiki_title = wiki_title
211
+ if type(wikipage) != str:
212
+ texts = WikiPage(wikipage.page_content).get_paragraphs()
213
+ add_texts_to_collection(coll_name=collection_name, texts=texts, file=wiki_title, source='wiki')
214
+ else:
215
+ print(wikipage)
216
+
217
+ """
218
+ Generate Control
219
+ """
220
+
221
+
222
+ async def generate_doc_from_db(self, collection_name: str, from_files: [str]) -> [str]:
223
+
224
+ def query_from_task(task):
225
+ return get_public_paragraph(task)
226
+
227
+ async def retrieve_text_and_generate(t, collection_name: str, from_files: [str]):
228
+ """
229
+ retreives the texts from the database and generates the documents
230
+ """
231
+ # retreive the texts from the database
232
+ task_query = query_from_task(t)
233
+ texts = query_collection(coll_name=collection_name, query=task_query, from_files=from_files)
234
+ task_resolutions = get_private_paragraph(task=t, texts=texts)
235
+ return task_resolutions
236
+
237
+ async def real_doc_generation(new_doc):
238
+ async_task_resolutions = [asyncio.create_task(retrieve_text_and_generate(t=task, collection_name=collection_name, from_files=from_files))
239
+ for task in new_doc.tasks]
240
+ tasks_resolutions = await asyncio.gather(*async_task_resolutions) #A VOIR
241
+ gen_path = f"{self.generated_docs_path}/{new_doc.name}e.docx"
242
+ gen_doc = new_doc.copy(gen_path)
243
+ gen_doc.replace_tasks(tasks_resolutions)
244
+ gen_doc.save_as_docx()
245
+ gen_paths.append(gen_doc.path)
246
+ self.gen_docs.append(gen_doc)
247
+ return gen_paths
248
+
249
+ gen_paths = []
250
+ gen_paths = await asyncio.gather(*[asyncio.create_task(real_doc_generation(new_doc)) for new_doc in self.new_docs])
251
+ gen_paths = [path for sublist in gen_paths for path in sublist]
252
+ gen_paths = list(set(gen_paths))
253
+ return gen_paths
254
+
255
+
256
+
257
+
258
+
259
+
260
+ """
261
+ Requirements
262
+ """
263
+
264
+ def set_input_csv(self, csv_path: str):
265
+ """
266
+ TODO: rename to set_requirements_file
267
+ """
268
+ self.input_csv = csv_path
269
+
270
+ def create_collection(self, docs: [Doc]):
271
+ """
272
+ TODO: rename to create_requirements_collection
273
+ TODO: merge with semantic tool to have only one DB Object
274
+ """
275
+ coll_name = "collection_for_docs"
276
+ collection = self.client_db.get_or_create_collection(coll_name)
277
+ for doc in docs:
278
+ self.fill_collection(doc, collection)
279
+ self.retriever.collection = collection
280
+
281
+ def fill_collection(self, doc: Doc, collection: str):
282
+ """
283
+ fills the collection with the blocks of the documents
284
+ """
285
+ Retriever(doc=doc, collection=collection)
src/domain/block.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+
3
+ class Block:
4
+ def __init__(self, doc: str = '', title: str = '', content: str = '', content_fr: str = '',
5
+ index: str = '', rank: int = 0, level: int = 0, distance: float = 99999):
6
+ self.doc = doc
7
+ self.title = title
8
+ self.title_fr = ""
9
+ self.content = content
10
+ self.content_fr = content_fr
11
+ self.specials = []
12
+ self.index = index
13
+ self.rank = rank
14
+ self.level = level
15
+ self.distance = distance
16
+
17
+
18
+ def separate_1_block_in_n(self, max_size=4500):
19
+ """
20
+ Separate a block in n blocks of equal size
21
+ """
22
+ content_length = len(self.content)
23
+ n = math.ceil(content_length / max_size)
24
+ block_size = content_length // n
25
+ new_blocks = []
26
+ for i in range(n):
27
+ start = i * block_size
28
+ end = (i + 1) * block_size if i < n - 1 else None
29
+ new_blocks.append(Block(doc=self.doc,
30
+ title=self.title + f"_part{i}",
31
+ content=self.content[start:end],
32
+ index=self.index + f"_{i}",
33
+ rank=self.rank,
34
+ level=self.level))
35
+ return new_blocks
36
+
37
+ def to_dict(self) -> {}:
38
+ block_dict = {'doc': self.doc,
39
+ 'title': self.title,
40
+ 'title_fr': self.title_fr,
41
+ 'content': self.content,
42
+ 'content_fr': self.content_fr,
43
+ 'index': self.index,
44
+ 'rank': self.rank,
45
+ 'level': self.level,
46
+ 'distance': self.distance}
47
+ for i, s in enumerate(self.specials):
48
+ special_key = 'special_'+str(i)
49
+ block_dict[special_key] = s
50
+ block_dict['specials_len'] = len(self.specials)
51
+ return block_dict
52
+
53
+ def from_dict(self, block_dict: {}):
54
+ self.doc = block_dict['doc']
55
+ self.title = block_dict['title']
56
+ self.title_fr = block_dict['title_fr']
57
+ self.content = block_dict['content']
58
+ self.content_fr = block_dict['content_fr']
59
+ self.index = block_dict['index']
60
+ self.rank = block_dict['rank']
61
+ self.level = block_dict['level']
62
+ self.distance = block_dict['distance']
63
+ self.specials = []
64
+ for i in range(block_dict['specials_len']):
65
+ special_key = 'special_' + str(i)
66
+ self.specials.append(block_dict[special_key])
67
+ return self
68
+
69
+ @property
70
+ def distance_str(self) -> str:
71
+ return format(self.distance, '.2f')
src/domain/container.py ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.domain.paragraph import Paragraph
2
+ from src.domain.block import Block
3
+
4
+ INFINITE = 10000
5
+
6
+
7
+ class Container:
8
+
9
+ def __init__(self, paragraphs: [Paragraph], title: Paragraph = None, level: int = 0, index: [int] = None,
10
+ father=None, id_=0):
11
+ """
12
+ should add some summary or infos on content (by a priori generation)
13
+ """
14
+ if index is None:
15
+ index = []
16
+ self.level = level
17
+ if not self.level:
18
+ pass
19
+ self.title = title
20
+ self.paragraphs = []
21
+ self.all_paragraphs = paragraphs
22
+ self.children = []
23
+ self.index = index
24
+ self.father = father # if not father, then the container is at the top of the hierarchy
25
+ self.id_ = int(str(1) + str(father.id_) + str(id_))
26
+ if paragraphs:
27
+ self.paragraphs, self.children = self.create_children(paragraphs.copy(), level, index)
28
+ self.containers = [self]
29
+ for child in self.children:
30
+ self.containers += child.containers
31
+ self.blocks = self.get_blocks()
32
+ self.normal, self.comment, self.task, _ = self.sort_paragraphs()
33
+
34
+ self.one_liner = (self.title.text if self.title else '') + ' ' + self.comment
35
+ self.root_text = self.one_liner + ' ' + self.normal
36
+
37
+
38
+ @property
39
+ def text(self):
40
+ text = ""
41
+ if self.title:
42
+ text = "Titre " + str(self.level) + " : " + self.title.text + '\n'
43
+ for p in self.paragraphs:
44
+ text += p.text + '\n'
45
+ for child in self.children:
46
+ text += child.text
47
+ return text
48
+
49
+ @property
50
+ def table_of_contents(self):
51
+ """
52
+ Not used
53
+ """
54
+ toc = []
55
+ if self.title:
56
+ toc += [{str(self.level): self.title.text}]
57
+ if self.children:
58
+ for child in self.children:
59
+ toc += child.table_of_contents
60
+ return toc
61
+
62
+ def move(self, position: int, new_father=None):
63
+ """
64
+ Not used
65
+ """
66
+ current_father = self.father
67
+ current_father.children.remove(self)
68
+
69
+ self.rank = new_father.rank + 1 if new_father else 0
70
+ self.father = new_father
71
+ if position < len(new_father.children):
72
+ new_father.children.insert(position, self)
73
+ else:
74
+ new_father.children.append(self)
75
+
76
+ def create_children(self, paragraphs, level, rank) -> ([], []):
77
+ """
78
+ creates children containers or directly attached content
79
+ and returns the list of containers and contents of level+1
80
+ :return:
81
+ [Content or Container]
82
+ """
83
+ attached_paragraphs = []
84
+ container_paragraphs = []
85
+ container_title = None
86
+ children = []
87
+ in_children = False
88
+ level = INFINITE
89
+ child_id = 0
90
+
91
+ while paragraphs:
92
+ p = paragraphs.pop(0)
93
+ if not in_children and not p.is_structure:
94
+ attached_paragraphs.append(p)
95
+ else:
96
+ in_children = True
97
+ if p.is_structure and p.level <= level: # if p is higher or equal in hierarchy
98
+ if container_paragraphs or container_title:
99
+ children.append(Container(container_paragraphs, container_title, level, rank, self, child_id))
100
+ child_id += 1
101
+ container_paragraphs = []
102
+ container_title = p
103
+ level = p.level
104
+
105
+ else: # p is strictly lower in hierarchy
106
+ container_paragraphs.append(p)
107
+
108
+ if container_paragraphs or container_title:
109
+ children.append(Container(container_paragraphs, container_title, level, rank, self, child_id))
110
+ child_id += 1
111
+
112
+ return attached_paragraphs, children
113
+
114
+ @property
115
+ def structure(self):
116
+
117
+ self_structure = {str(self.id_): {
118
+ 'index': str(self.id_),
119
+ 'canMove': True,
120
+ 'isFolder': True,
121
+ 'children': [p.id_ for p in self.paragraphs] + [child.id_ for child in self.children],
122
+ 'canRename': True,
123
+ 'data': {},
124
+ 'level': self.level,
125
+ 'title': self.title.text if self.title else 'root'
126
+ }}
127
+ paragraphs_structure = [p.structure for p in self.paragraphs]
128
+ structure = [self_structure] + paragraphs_structure
129
+ for child in self.children:
130
+ structure += child.structure
131
+ return structure
132
+
133
+ def get_lang(self):
134
+ """
135
+ returns the main language of the document
136
+ :return:
137
+ """
138
+
139
+ def get_structure(self, level=2):
140
+ """
141
+ returns the structure of the document
142
+ :return:
143
+ """
144
+
145
+ def create_embeddings(self):
146
+ """
147
+
148
+ :return:
149
+ """
150
+
151
+ def get_blocks(self):
152
+ block = Block(level=self.level, index=self.index)
153
+ if self.title:
154
+ block.title = self.title.text
155
+ for p in self.paragraphs:
156
+ if not p.blank:
157
+ if p.text.startswith('##### '):
158
+ special_action = p.text.lstrip('##### ')
159
+ block.specials.append(special_action)
160
+ else:
161
+ block.content += p.text
162
+ blocks = [block] if block.content or block.specials else []
163
+ for child in self.children:
164
+ blocks += child.blocks
165
+ return blocks
166
+
167
+ def get_fulltask(self, doc_one_liner):
168
+ index = 0
169
+ siblings_ = []
170
+ if isinstance(self.father, Container):
171
+ siblings_ = self.father.children.copy()
172
+ index = siblings_.index(self)
173
+ siblings_before_context = [sibling.one_liner for idx, sibling in enumerate(siblings_) if idx < index]
174
+ siblings_after_context = [sibling.one_liner for idx, sibling in enumerate(siblings_) if index < idx]
175
+
176
+ fulltask = {'description': self.task,
177
+ 'about': self.one_liner,
178
+ 'doc_description': doc_one_liner,
179
+ 'above': self.father.one_liner if isinstance(self.father, Container) else '',
180
+ 'before': siblings_before_context,
181
+ 'after': siblings_after_context}
182
+ return fulltask
183
+
184
+ def sort_paragraphs(self) -> (str, str, str, str):
185
+ mapping = {'normal': '', 'comment': '', 'task': '', 'title': ''}
186
+ for p in self.paragraphs:
187
+ mapping[p.type] += ' ' + p.parsed_text
188
+ return mapping['normal'], mapping['comment'], mapping['task'], mapping['title']
189
+
190
+ def get_all_styles_used_in_doc_except_list(self):
191
+ """
192
+ loop in doc? rather thann in container? (since it applies only to container of level 0)
193
+ """
194
+ styles = []
195
+ for p in self.all_paragraphs:
196
+ styles.append(p.get_styles_in_paragraph_except_list())
197
+ res = []
198
+ #flatten the list
199
+ temp = [item for sublist in styles for item in sublist]
200
+ names = [style.name for style in temp]
201
+ for s in temp:
202
+ if s.name in names:
203
+ res.append(s)
204
+ names.remove(s.name)
205
+ return res
206
+
207
+ def get_list_styles(self):
208
+ styles = []
209
+ for p in self.all_paragraphs:
210
+ styles.append(p.get_list_styles())
211
+ res = list(set().union(*styles))
212
+ return res
213
+
214
+ def retrieve_number_of_misapplied_styles(self):
215
+ res = 0
216
+ for p in self.all_paragraphs:
217
+ if p.style_misapplied:
218
+ res += 1
219
+ return res
src/domain/container_requirements.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.domain.paragraph import Paragraph
2
+ from src.domain.block import Block
3
+
4
+ INFINITE = 10000
5
+
6
+
7
+ class Container_requirements:
8
+
9
+ def __init__(self, paragraphs: [Paragraph], title: Paragraph = None, level: int = 0, index: [int] = None,
10
+ father=None, id_=0):
11
+ if index is None:
12
+ index = []
13
+ self.level = level
14
+ if not self.level:
15
+ pass
16
+ self.title = title
17
+ self.paragraphs = []
18
+ self.all_paragraphs = paragraphs
19
+ self.children = []
20
+ self.index = index
21
+ self.father = father # if not father, then the container is at the top of the hierarchy
22
+ self.id_ = int(str(1) + str(father.id_) + str(id_))
23
+ if paragraphs:
24
+ self.paragraphs, self.children = self.create_children(paragraphs.copy(), level, index)
25
+ self.containers = [self]
26
+ for child in self.children:
27
+ self.containers += child.containers
28
+ self.blocks = self.get_blocks_requirements()
29
+
30
+
31
+ @property
32
+ def text(self):
33
+ text = ""
34
+ if self.title:
35
+ text = "Titre " + str(self.level) + " : " + self.title.text + '\n'
36
+ for p in self.paragraphs:
37
+ text += p.text + '\n'
38
+ for child in self.children:
39
+ text += child.text
40
+ return text
41
+
42
+
43
+ def move(self, position: int, new_father=None):
44
+ current_father = self.father # should be added in the domain
45
+ current_father.children.remove(self)
46
+
47
+ self.rank = new_father.rank + 1 if new_father else 0
48
+ self.father = new_father
49
+ if position < len(new_father.children):
50
+ new_father.children.insert(position, self)
51
+ else:
52
+ new_father.children.append(self)
53
+
54
+ def create_children(self, paragraphs, level, rank) -> ([], []):
55
+ """
56
+ creates children containers or directly attached content
57
+ and returns the list of containers and contents of level+1
58
+ :return:
59
+ [Content or Container]
60
+ """
61
+ attached_paragraphs = []
62
+ container_paragraphs = []
63
+ container_title = None
64
+ children = []
65
+ in_children = False
66
+ level = INFINITE
67
+ child_id = 0
68
+
69
+ while paragraphs:
70
+ p = paragraphs.pop(0)
71
+ if not in_children and not p.is_structure:
72
+ attached_paragraphs.append(p)
73
+ else:
74
+ in_children = True
75
+ if p.is_structure and p.level <= level: # if p is higher or equal in hierarchy
76
+ if container_paragraphs or container_title:
77
+ children.append(Container_requirements(container_paragraphs, container_title, level, rank, self, child_id))
78
+ child_id += 1
79
+ container_paragraphs = []
80
+ container_title = p
81
+ level = p.level
82
+
83
+ else: # p is strictly lower in hierarchy
84
+ container_paragraphs.append(p)
85
+
86
+ if container_paragraphs or container_title:
87
+ children.append(Container_requirements(container_paragraphs, container_title, level, rank, self, child_id))
88
+ child_id += 1
89
+
90
+ return attached_paragraphs, children
91
+
92
+ @property
93
+ def structure(self):
94
+
95
+ self_structure = {str(self.id_): {
96
+ 'index': str(self.id_),
97
+ 'canMove': True,
98
+ 'isFolder': True,
99
+ 'children': [p.id_ for p in self.paragraphs] + [child.id_ for child in self.children],
100
+ 'canRename': True,
101
+ 'data': {},
102
+ 'level': self.level,
103
+ 'title': self.title.text if self.title else 'root'
104
+ }}
105
+ paragraphs_structure = [p.structure for p in self.paragraphs]
106
+ structure = [self_structure] + paragraphs_structure
107
+ for child in self.children:
108
+ structure += child.structure
109
+ return structure
110
+
111
+ def get_blocks_requirements(self):
112
+ block = Block(level=self.level, index=self.index)
113
+ if self.title:
114
+ self.title.text = self.title.text.replace('\r', '').replace('\n', '')
115
+ block.title = self.title.text
116
+ block.content = self.title.text + '/'
117
+ temp_father = self.father
118
+ while temp_father and type(temp_father) == Container_requirements:
119
+ if temp_father.title:
120
+ temp_father.title.text = temp_father.title.text.replace('\r', '').replace('\n', '')
121
+ block.content = temp_father.title.text + '/' + block.content
122
+ temp_father = temp_father.father
123
+ block.content += " :\n\n"
124
+ i = 0
125
+ for p in self.paragraphs:
126
+ if not p.blank:
127
+ i = 1
128
+ if p.text.startswith('##### '):
129
+ special_action = p.text.lstrip('##### ')
130
+ block.specials.append(special_action)
131
+ else:
132
+ block.content += p.text
133
+ if i == 0:
134
+ blocks = []
135
+ else:
136
+ blocks = [block]
137
+ for child in self.children:
138
+ blocks += child.blocks
139
+ return blocks
140
+
src/domain/doc.py ADDED
@@ -0,0 +1,473 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import docx
2
+ from src.tools.doc_tools import *
3
+ from docxcompose.composer import Composer
4
+ from docx import Document as Document_compose
5
+ from docx.enum.table import WD_TABLE_ALIGNMENT
6
+ from src.domain.container import Container
7
+ from src.domain.container_requirements import Container_requirements
8
+ from src.domain.paragraph import Paragraph
9
+ from src.domain.styles import Styles
10
+ import shutil
11
+ import os
12
+ from docx.oxml.ns import qn
13
+ from docx.oxml.shared import OxmlElement
14
+ from docx.shared import Inches
15
+ from src.tools.pretty_print import pretty_print_block_and_indexes, pretty_print_paragraphs
16
+ from src.tools.index_creation import set_indexes
17
+ from src.reader.reader_for_requirements import WordReader
18
+
19
+ class Doc:
20
+
21
+ """
22
+ TODO: mettre _ devant les méthodes internes
23
+ """
24
+
25
+ def __init__(self, path='', id_=None):
26
+ self.xdoc = docx.Document(path)
27
+ self.title = get_title(path)
28
+ self.name = self.title.split('.')[0]
29
+ self.id_ = id(self)
30
+ self.path = path
31
+ self.paragraphs = [Paragraph(xp, self.id_, i, self) for (i, xp) in enumerate(self.xdoc.paragraphs)]
32
+ self.requirements_paragraphs = WordReader(self.path).paragraphs if not "data/templates" in self.path else []
33
+ self.handle_content_before_toc()
34
+ self.container = Container(self.paragraphs, father=self)
35
+ self.container_requirements = Container_requirements(self.requirements_paragraphs, father=self)
36
+ set_indexes(self.container, self.path)
37
+ set_indexes(self.container_requirements, self.path)
38
+ self.styles = Styles(self.xdoc.styles)
39
+ self.tasks = [c.get_fulltask(self.container.one_liner) for c in self.container.containers if c.task]
40
+ self.blocks = self.get_blocks()
41
+ self.blocks_requirements = self.get_blocks_requirements()
42
+
43
+
44
+ def copy(self, new_doc_path):
45
+ shutil.copyfile(self.path, new_doc_path)
46
+ new_doc = Doc(new_doc_path)
47
+ new_doc.save_as_docx(new_doc_path)
48
+ return new_doc
49
+
50
+ def clear(self):
51
+ os.remove(self.path)
52
+
53
+ def apply_template(self, template, options_list):
54
+ """
55
+ TODO: mettre le texte dans un fichier de config
56
+ """
57
+ log = []
58
+ j = 0
59
+ if ("Justifier le texte (Normal)" in options_list):
60
+ log.append("Le contenu du document a été justifié")
61
+ self.justify_content()
62
+ self.save_as_docx()
63
+ if("Recentrer les tableaux" in options_list):
64
+ j = self.center_tables()
65
+ log.append(f"{j} table{'s' if j>1 else ''} centrée{'s' if j>1 else ''}")
66
+ self.save_as_docx()
67
+ log.append(f"Le template {template.name} a été ajouté avant le document")
68
+ self.rearrange_tables()
69
+ self.save_as_docx()
70
+ log = self.styles.apply_from(template.styles, log)
71
+ self.save_as_docx()
72
+ self.delete_toc(template)
73
+ self.normal_style_for_empty_paragraphs()
74
+ self.save_as_docx()
75
+ self.append_doc_to_template_and_update_toc(template)
76
+ return log
77
+
78
+ def copy_one_style(self, src_style_name: str, dest_style_name: str, template):
79
+ style_dest = template.styles.get_style_from_name(dest_style_name)
80
+ src_style = self.styles.get_style_from_name(src_style_name)
81
+ if src_style:
82
+ log = self.styles.copy_one_style(src_style, style_dest)
83
+ return log
84
+ else:
85
+ return None
86
+
87
+ def get_different_styles_with_template(self, template):
88
+ styles_used_in_doc = self.get_all_styles_used_in_doc_except_list()
89
+ different_styles = get_difference_with_template(styles_used_in_doc, template)
90
+ return different_styles
91
+
92
+ def save_as_docx(self, path: str = ''):
93
+ path = path if path else self.path
94
+ self.path = path
95
+ self.xdoc.save(path)
96
+
97
+ def get_blocks(self):
98
+
99
+ """
100
+ TODO: do a function that determines if the Doc is not a template nor a generated doc
101
+ TODO: merge the two functions for getting blocks
102
+ TODO: why do we need two functions? in the end, we need only
103
+ """
104
+ if "temp/generated_files" in self.path or "data/templates" in self.path:
105
+ return
106
+
107
+ def from_list_to_str(index_list):
108
+ index_str = str(index_list[0])
109
+ for el in index_list[1:]:
110
+ index_str += '.' + str(el)
111
+ return index_str
112
+
113
+ blocks = self.container.blocks
114
+ for block in blocks:
115
+ block.doc = self.title
116
+ block.index = from_list_to_str(block.index)
117
+ return blocks
118
+
119
+
120
+ def get_blocks_requirements(self):
121
+ if "temp/generated_files" in self.path or "data/templates" in self.path:
122
+ return
123
+
124
+ def from_list_to_str(index_list):
125
+ index_str = str(index_list[0])
126
+ for el in index_list[1:]:
127
+ index_str += '.' + str(el)
128
+ return index_str
129
+
130
+ blocks = self.container_requirements.blocks
131
+ for block in blocks:
132
+ block.doc = self.title
133
+ block.index = from_list_to_str(block.index) if not isinstance(block.index, str) else block.index
134
+ # print(f"{block.index}: {block.content}")
135
+ # print("--------------------------------------------------")
136
+ return blocks
137
+
138
+ @property
139
+ def toc(self):
140
+ """
141
+ return the paragraphs that are in the table of contents
142
+ """
143
+ return [p for p in self.paragraphs if p.toc]
144
+
145
+ @property
146
+ def structure(self):
147
+ return self.container.structure
148
+
149
+ def replace_tasks(self, resolutions: [str]):
150
+ if len(resolutions) == len(self.tasks): # exception to be handled
151
+ p_tasks = [p for p in self.paragraphs if p.type == 'task']
152
+ for p, r in zip(p_tasks, resolutions):
153
+ p.set_text(r)
154
+ else:
155
+ print(f"résolutions : {len(resolutions)} != {len(self.tasks)} tasks")
156
+ return self
157
+
158
+ def get_paragraphs(self):
159
+ return self.container.all_paragraphs
160
+
161
+ def get_text_from_paragraphs(self):
162
+ return [p.text for p in self.paragraphs]
163
+
164
+ def check_document(self):
165
+ """
166
+ debugging function to analyse the doc
167
+ """
168
+ picCount = 0
169
+ tabCount = 0
170
+ for paragraph in self.xdoc.paragraphs:
171
+ if picCount < len(self.xdoc.inline_shapes):
172
+ print('\033[1mPicture \033[0m')
173
+ picCount += 1
174
+ elif paragraph.text:
175
+ print(paragraph.text)
176
+ elif tabCount < len(self.xdoc.tables):
177
+ table = self.xdoc.tables[tabCount]
178
+ data = []
179
+ keys = None
180
+ for i, row in enumerate(table.rows):
181
+ text = (cell.text for cell in row.cells)
182
+ if i == 0:
183
+ keys = tuple(text)
184
+ continue
185
+ row_data = dict(zip(keys, text))
186
+ data.append(row_data)
187
+ print('\033[1mTable:\033[0m', data)
188
+ tabCount += 1
189
+ else:
190
+ print('\033[1mEmpty paragraph\033[0m')
191
+
192
+
193
+ def center_tables(self):
194
+ j = 0
195
+ for table in self.xdoc.tables:
196
+ j += 1
197
+ table.alignment = WD_TABLE_ALIGNMENT.CENTER
198
+ return j
199
+
200
+ def rearrange_tables(self):
201
+ """
202
+ Hotfix for autofit.
203
+ directly from XML
204
+ """
205
+ for t_idx, _ in enumerate(self.xdoc.tables):
206
+ self.xdoc.tables[t_idx].autofit = True
207
+ self.xdoc.tables[t_idx].allow_autofit = True
208
+ self.xdoc.tables[t_idx]._tblPr.xpath("./w:tblW")[0].attrib["{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type"] = "auto"
209
+ for row_idx, _ in enumerate(self.xdoc.tables[t_idx].rows):
210
+ for cell_idx, _ in enumerate(self.xdoc.tables[t_idx].rows[row_idx].cells):
211
+ self.xdoc.tables[t_idx].rows[row_idx].cells[cell_idx]._tc.tcPr.tcW.type = 'auto'
212
+ self.xdoc.tables[t_idx].rows[row_idx].cells[cell_idx]._tc.tcPr.tcW.w = 0
213
+
214
+ def center_images(self):
215
+ """
216
+ works only for images in the run
217
+ """
218
+ for paragraph in self.paragraphs:
219
+ paragraph.center_paragraph()
220
+
221
+ def justify_content(self):
222
+ """
223
+ applied only to normal style
224
+ """
225
+ for paragraph in self.paragraphs:
226
+ paragraph.justify_paragraph()
227
+
228
+ def number_images_in_doc(self):
229
+ """
230
+ for debug = not used
231
+ """
232
+ picCount = 0
233
+ for _ in self.xdoc.paragraphs:
234
+ if picCount < len(self.xdoc.inline_shapes):
235
+ print('\033[1mPicture \033[0m')
236
+ picCount += 1
237
+ return picCount
238
+
239
+ def get_all_styles_used_in_doc_except_list(self):
240
+ return self.container.get_all_styles_used_in_doc_except_list()
241
+
242
+ def get_list_styles(self):
243
+ return self.container.get_list_styles()
244
+
245
+ def retrieve_number_of_misapplied_styles(self):
246
+ return self.container.retrieve_number_of_misapplied_styles()
247
+
248
+ def normal_style_for_empty_paragraphs(self):
249
+ for p in self.paragraphs:
250
+ if p.blank and not p.toc:
251
+ p.set_style(self.styles.get_style_from_name("Normal"))
252
+ self.save_as_docx()
253
+
254
+
255
+ def append_doc_to_template_and_update_toc(self,template):
256
+ """
257
+ TODO: rename Document_compose into XDocument
258
+ Document_compose = plain old Document from docx
259
+ Composer = from docxcompose => allows to modify several documents
260
+ """
261
+ master = Document_compose(template.path)
262
+ composer = Composer(master)
263
+ doc = Document_compose(self.path)
264
+ composer.append(doc)
265
+ composer.save(self.path)
266
+ new_doc = Doc(self.path)
267
+ update_table_of_contents(new_doc.xdoc)
268
+ new_doc.save_as_docx()
269
+
270
+ def delete_content_before_toc(self):
271
+ """
272
+ TODO: loop with paragraph (ours)
273
+ """
274
+ if self.contains_toc():
275
+ for line in self.xdoc.paragraphs:
276
+ if "toc" in line.style.name:
277
+ break
278
+ if len(line.text) == 0:
279
+ self.delete_paragraph(line)
280
+ self.paragraphs.pop(0)
281
+ continue
282
+ if 'toc' not in line.style.name:
283
+ self.delete_paragraph(line)
284
+ self.paragraphs.pop(0)
285
+ self.save_as_docx()
286
+
287
+ def delete_paragraph(self, paragraph):
288
+ """
289
+ TODO: to be put in paragraph
290
+ """
291
+ p = paragraph._element
292
+ p.getparent().remove(p)
293
+ paragraph._p = paragraph._element = None
294
+
295
+ def delete_toc(self,template):
296
+ """
297
+ TODO: loop with paragraph (ours)
298
+ """
299
+ index_to_insert = None
300
+ for index, p in enumerate(template.paragraphs):
301
+ index_to_insert = index
302
+ if ("table des matières" or "table of contents") in p.text.lower():
303
+ index_to_insert += 1
304
+ break
305
+ xparagraphs_toc = [p.xparagraph for p in self.toc]
306
+ for p in xparagraphs_toc:
307
+ self.delete_paragraph(p)
308
+ self.paragraphs.pop(0)
309
+ self.save_as_docx()
310
+
311
+
312
+ def insert_table_of_content(self,index):
313
+ """
314
+ To create a TOC (not used here)
315
+ """
316
+ paragraph = self.xdoc.paragraphs[index].insert_paragraph_before("", "Normal")
317
+ paragraph.paragraph_format.space_before = Inches(0)
318
+ paragraph.paragraph_format.space_after = Inches(0)
319
+ run = paragraph.add_run()
320
+
321
+ fldChar = OxmlElement('w:fldChar') # creates a new element
322
+ fldChar.set(qn('w:fldCharType'), 'begin') # sets attribute on element
323
+
324
+ instrText = OxmlElement('w:instrText')
325
+ instrText.set(qn('xml:space'), 'preserve') # sets attribute on element
326
+ instrText.text = 'TOC \\o "1-5" \\h \\z \\u' # change 1-3 depending on heading levels you need
327
+
328
+ fldChar2 = OxmlElement('w:fldChar')
329
+ fldChar2.set(qn('w:fldCharType'), 'separate')
330
+
331
+ fldChar3 = OxmlElement('w:t')
332
+ fldChar3.text = "Right-click to update field."
333
+ fldChar3 = OxmlElement('w:updateFields')
334
+ fldChar3.set(qn('w:val'), 'true')
335
+ fldChar2.append(fldChar3)
336
+
337
+ fldChar4 = OxmlElement('w:fldChar')
338
+ fldChar4.set(qn('w:fldCharType'), 'end')
339
+
340
+ r_element = run._r
341
+ r_element.append(fldChar)
342
+ r_element.append(instrText)
343
+ r_element.append(fldChar2)
344
+ r_element.append(fldChar4)
345
+
346
+ p_element = paragraph._p
347
+ print(p_element.xml)
348
+
349
+
350
+ def contains_toc(self):
351
+ body_elements = self.xdoc._body._body
352
+ #extract those wrapped in <w:r> tag
353
+ rs = body_elements.xpath('.//w:r')
354
+ #check if style is hyperlink (toc)
355
+ table_of_content = []
356
+ for r in rs:
357
+ if r.style:
358
+ if "hyperlink" in r.style.lower() or "lienhypertexte" in r.style.lower():
359
+ table_of_content.append(r.text)
360
+ if len(table_of_content) > 0:
361
+ return True
362
+ else:
363
+ return False
364
+
365
+ def handle_content_before_toc(self):
366
+ """
367
+ TODO: use a function to determine the type of the doc
368
+ """
369
+ if not "data/templates" in self.path and not "temp/generated_files" in self.path: #PREMIER PROBLEME
370
+ self.delete_content_before_toc()
371
+
372
+
373
+ def delete_style(self, style_name):
374
+ self.styles.delete_style(style_name)
375
+ self.save_as_docx()
376
+
377
+ def change_bullet_style(self, style_name, template_style_name, template) -> {}:
378
+ """
379
+ TODO: recode to respect the OOP
380
+ suppression of a paragraph with a bullet and rewriting of the bullet with style_name in the target styple (template_style_name)
381
+ real_style_name = core style name with no indentation
382
+ level = indentation level
383
+ """
384
+ i = 0
385
+ real_style_name = style_name.split(' : ')[0]
386
+ level = int(style_name.split(' = ')[1])
387
+ while i < len(self.xdoc.paragraphs):
388
+ para = self.xdoc.paragraphs[i]
389
+ if real_style_name == para.style.name and self.paragraphs[i].is_list and self.paragraphs[i].list_indentation == level:
390
+ #print xml of paragraph and retrieve the level
391
+ self.delete_paragraph(self.xdoc.paragraphs[i])
392
+ self.paragraphs.pop(i)
393
+ if i == len(self.xdoc.paragraphs):
394
+ paragraph_inserted = self.xdoc.add_paragraph(para.text, style=template.styles.get_style_from_name(template_style_name))
395
+ self.paragraphs.insert(i, Paragraph(paragraph_inserted, self.id_, i, self))
396
+ else:
397
+ paragraph_inserted = self.xdoc.paragraphs[i].insert_paragraph_before(para.text, style=template.styles.get_style_from_name(template_style_name))
398
+ self.paragraphs.insert(i, Paragraph(paragraph_inserted, self.id_, i, self))
399
+ i += 1
400
+ log_dict = self.change_bullet_style_in_tables(style_name, template_style_name, template)
401
+ self.save_as_docx()
402
+ return log_dict
403
+
404
+ def change_bullet_style_in_tables(self, style_name, template_style_name, template) -> {}:
405
+ """
406
+ same as abobe
407
+ TODO: ... same as above
408
+ """
409
+ i = 0
410
+ real_style_name = style_name.split(' : ')[0]
411
+ level = int(style_name.split(' = ')[1])
412
+ for table in self.xdoc.tables:
413
+ for row in table.rows:
414
+ for cell in row.cells:
415
+ i = 0
416
+ for para in cell.paragraphs:
417
+ real_para = Paragraph(para, self.id_, i, self)
418
+ if real_style_name == para.style.name and real_para.is_list and real_para.list_indentation == level:
419
+ self.delete_paragraph(para)
420
+ if i == len(cell.paragraphs):
421
+ cell.add_paragraph(real_para.text, style=template.styles.get_style_from_name(template_style_name))
422
+ else:
423
+ cell.paragraphs[i].insert_paragraph_before(real_para.text, style=template.styles.get_style_from_name(template_style_name))
424
+ i += 1
425
+ log = f"Le style {style_name} a été changé en {template_style_name}"
426
+ log_dict = {'list_mapping': log}
427
+ return log_dict
428
+
429
+ def table_insertion(self, paragraph: Paragraph, content: dict):
430
+ #the content is the content of the table with the following format:
431
+ #content = {
432
+ # "header": ["header1", "header2", "header3"],
433
+ # "rows": [
434
+ # ["row1", "row1", "row1"],
435
+ # ["row2", "row2", "row2"],
436
+ # ["row3", "row3", "row3"],
437
+ # ]
438
+ #}
439
+ self.xdoc.add_table(rows = len(content["rows"]) + 1, cols = len(content["headers"]))
440
+ #Normal table default style
441
+ table = self.xdoc.tables[-1]
442
+ #add the header
443
+ for i, header in enumerate(content["headers"]):
444
+ table.cell(0, i).text = header
445
+ #add the rows
446
+ for i, row in enumerate(content["rows"]):
447
+ for j, cell in enumerate(row):
448
+ table.cell(i+1, j).text = cell
449
+ #insert the table after the paragraph
450
+ self.move_table_after(table, paragraph.xparagraph)
451
+ self.rearrange_tables()
452
+ #save the doc
453
+ self.save_as_docx()
454
+ return table
455
+
456
+ def delete_table(self, table):
457
+ table._element.getparent().remove(table._element)
458
+ table._element = table._row = None
459
+ self.save_as_docx()
460
+
461
+ def move_table_after(self, table, paragraph):
462
+ tbl, p = table._tbl, paragraph._p
463
+ p.addnext(tbl)
464
+
465
+
466
+ def remove_all_but_last_section(self):
467
+ """
468
+ not used
469
+ """
470
+ sectPrs = self.xdoc._element.xpath(".//w:pPr/w:sectPr")
471
+ for sectPr in sectPrs:
472
+ print(sectPr)
473
+ sectPr.getparent().remove(sectPr)
src/domain/paragraph.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import string
2
+ from docx.enum.text import WD_ALIGN_PARAGRAPH
3
+ from src.tools.paragraph_tools import find_list_indentation_level
4
+
5
+
6
+ INFINITE = 10000
7
+
8
+ class Paragraph:
9
+
10
+ def __init__(self, xparagraph, doc_id: int, id_: int, doc):
11
+
12
+ self.doc = doc
13
+ self.xparagraph = xparagraph
14
+ self.is_template_para = False if not "data/templates" in self.doc.path else True
15
+ self.id_ = int(str(2) + str(doc_id) + str(id_))
16
+ self.style_name = self.xparagraph.style.name
17
+ self.is_list, self.list_indentation = find_list_indentation_level(self.xparagraph, self.doc) if not self.is_template_para else (False, 0)
18
+ self.level = self.get_level_from_name(self.style_name)
19
+ self.is_structure = self.level < INFINITE
20
+ self.text = self.xparagraph.text
21
+ self.type, self.parsed_text = self.parse_text()
22
+
23
+ @property
24
+ def style_misapplied(self):
25
+ """
26
+ function bugged, not used
27
+ """
28
+ #check if the actual paragraph style properties are the same as the style itself
29
+ #if not, the style is misapplied
30
+ first_run_style = [run.style.font for run in self.xparagraph.runs]
31
+ first_run_style = first_run_style[0] if first_run_style else None
32
+ if not first_run_style:
33
+ return False
34
+ doc_style = self.doc.styles.get_style_from_name(self.style_name)
35
+ if first_run_style.size != doc_style.font.size:
36
+ return True
37
+ if first_run_style.name != doc_style.font.name:
38
+ return True
39
+ if first_run_style.bold != doc_style.font.bold:
40
+ return True
41
+ if first_run_style.italic != doc_style.font.italic:
42
+ return True
43
+ if first_run_style.underline != doc_style.font.underline:
44
+ return True
45
+ if first_run_style.all_caps != doc_style.font.all_caps:
46
+ return True
47
+ if first_run_style.color.rgb != doc_style.font.color.rgb:
48
+ return True
49
+ return False
50
+
51
+
52
+ @property
53
+ def structure(self):
54
+ structure = {str(self.id_): {
55
+ 'index': str(self.id_),
56
+ 'canMove': True,
57
+ 'isFolder': False,
58
+ 'children': [],
59
+ 'title': self.text,
60
+ 'canRename': True,
61
+ 'data': {},
62
+ 'level': self.level,
63
+ }}
64
+ return structure
65
+
66
+ @property
67
+ def blank(self):
68
+ """
69
+ checks if the paragraph is blank: i.e. it brings some signal (it may otherwise be ignored)
70
+ """
71
+ text = self.text.replace('\n', '')
72
+ return set(text).isdisjoint(string.ascii_letters)
73
+
74
+ @property
75
+ def toc(self):
76
+ """
77
+ Check if the paragraph is part of the table of contents
78
+ """
79
+ return "toc" in self.style_name
80
+
81
+ @staticmethod
82
+ def get_level_from_name(style_name: str) -> int:
83
+ level = INFINITE
84
+ if 'Titre' in style_name or 'Heading' in style_name:
85
+ suffix = style_name[-1]
86
+ try:
87
+ level = int(suffix)
88
+ except:
89
+ pass
90
+ return level
91
+
92
+
93
+ def parse_text(self) -> (str, str):
94
+
95
+ if self.is_structure:
96
+ return 'structure', self.text
97
+
98
+ startswith = {"?? ": "task", "++ ": "comment"}
99
+ for start in startswith.keys():
100
+ split = self.text.rsplit(start)
101
+ if 1 < len(split):
102
+ return startswith[start], split[1]
103
+
104
+ return "normal", self.text
105
+
106
+ def set_text(self, text: str):
107
+ self.text = text
108
+ self.xparagraph.text = text
109
+ return self
110
+
111
+ def center_paragraph(self):
112
+ if self.contains_image():
113
+ self.xparagraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
114
+
115
+ def justify_paragraph(self):
116
+ if(self.xparagraph.style.name == "Normal"):
117
+ self.xparagraph.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
118
+
119
+ def contains_image(self) -> bool:
120
+ return any("pic:pic" in run.element.xml for run in self.xparagraph.runs)
121
+
122
+ def get_styles_in_paragraph_except_list(self):
123
+ styles = [self.xparagraph.style] if not self.is_list else []
124
+ for run in self.xparagraph.runs:
125
+ if run.style.name != "Default Paragraph Font" and run.style.name != self.xparagraph.style.name:
126
+ styles.append(run.style)
127
+ return styles
128
+
129
+ def get_list_styles(self):
130
+ styles = []
131
+ if self.is_list:
132
+ styles.append(self.xparagraph.style.name + " : indentation = " + str(self.list_indentation))
133
+ return styles
134
+
135
+ def set_style(self, style):
136
+ self.xparagraph.style = style
137
+ return self
138
+
139
+
140
+
src/domain/requirements_paragraphs.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import string
2
+
3
+ INFINITE = 10000
4
+
5
+ class Requirement_Paragraph:
6
+ def __init__(self, text : str, font_style : str, id_ : int, page_id : int):
7
+ self.font_style = font_style
8
+ self.id_ = int(str(2)+str(page_id)+str(id_))
9
+ self.page_id = page_id
10
+ self.level = self.get_level_from_name(font_style)
11
+ self.is_structure = self.level < INFINITE
12
+ self.text = text
13
+
14
+ @property
15
+ def blank(self):
16
+ """
17
+ checks if the paragraph is blank: i.e. it brings some signal (it may otherwise be ignored)
18
+ """
19
+ text = self.text.replace('\n', '')
20
+ return set(text).isdisjoint(string.ascii_letters)
21
+
22
+ def rearrange_paragraph(self):
23
+ """
24
+ rearrange the paragraph to have a better structure
25
+ """
26
+ if self.font_style == "code":
27
+ self.text = "\n\nCode :```\n" + self.text + "\n```\n\n"
28
+ elif self.font_style == "table":
29
+ self.text = "\n\nTable :\n" + self.text + "\n\n"
30
+ return self
31
+
32
+ @staticmethod
33
+ def get_level_from_name(style_name: str) -> int:
34
+ level = INFINITE
35
+ if 'Titre' in style_name or 'Heading' in style_name:
36
+ suffix = style_name[-1]
37
+ try:
38
+ level = int(suffix)
39
+ except:
40
+ pass
41
+ return level
src/domain/styles.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from docx.enum.style import WD_STYLE_TYPE
2
+ from docx.shared import RGBColor
3
+ import re
4
+
5
+
6
+ class Styles:
7
+
8
+ def __init__(self, xstyles, doc_id=0, id_=0):
9
+
10
+ self.id_ = int(str(doc_id)+str(id_))
11
+ self.xstyles = xstyles
12
+ self.names = [s.name for s in xstyles]
13
+ @staticmethod
14
+ def copy_style(src=None, dest=None) -> {}:
15
+ modified_style = set()
16
+ if src.type == WD_STYLE_TYPE.PARAGRAPH:
17
+ same_color = True
18
+ if src.font.color.rgb:
19
+ dest_rgb = RGBColor(src.font.color.rgb[0], src.font.color.rgb[1], src.font.color.rgb[2])
20
+ if dest.font.color.rgb:
21
+ for i in range(3):
22
+ same_color *= dest.font.color.rgb[i] == dest_rgb[i]
23
+ else:
24
+ same_color = False
25
+ dest.font.color.rgb = dest_rgb
26
+ else:
27
+ if dest.font.color.rgb:
28
+ same_color = False
29
+ if not same_color:
30
+ modified_style.add(('color', True))
31
+
32
+ if dest.font.size != src.font.size:
33
+ dest.font.size = src.font.size
34
+ modified_style.add(('font size', (src.font.size, dest.font.size)))
35
+
36
+ if dest.font.name != src.font.name:
37
+ dest.font.name = src.font.name
38
+ modified_style.add(('font', (src.font.name, dest.font.name)))
39
+
40
+ if dest.font.all_caps != src.font.all_caps:
41
+ dest.font.all_caps = src.font.all_caps
42
+ modified_style.add(('all_caps', (src.font.all_caps, dest.font.all_caps)))
43
+
44
+ if dest.font.bold != src.font.bold:
45
+ dest.font.bold = src.font.bold
46
+ modified_style.add(('bold', (src.font.bold, dest.font.bold)))
47
+
48
+ dest.font.complex_script = src.font.complex_script
49
+ dest.font.cs_bold = src.font.cs_bold
50
+ dest.font.cs_italic = src.font.cs_italic
51
+ dest.font.double_strike = src.font.double_strike
52
+ dest.font.emboss = src.font.emboss
53
+ dest.font.hidden = src.font.hidden
54
+ dest.font.highlight_color = src.font.highlight_color
55
+ dest.font.imprint = src.font.imprint
56
+ dest.font.italic = src.font.italic
57
+ dest.font.math = src.font.math
58
+ dest.font.no_proof = src.font.no_proof
59
+ dest.font.outline = src.font.outline
60
+ dest.font.rtl = src.font.rtl
61
+ dest.font.shadow = src.font.shadow
62
+ dest.font.small_caps = src.font.small_caps
63
+ dest.font.snap_to_grid = src.font.snap_to_grid
64
+ dest.font.spec_vanish = src.font.spec_vanish
65
+ dest.font.strike = src.font.strike
66
+ dest.font.subscript = src.font.subscript
67
+ dest.font.superscript = src.font.superscript
68
+ dest.font.underline = src.font.underline
69
+ dest.font.web_hidden = src.font.web_hidden
70
+ dest.base_style = src.base_style
71
+ dest.hidden = src.hidden
72
+ dest.locked = src.locked
73
+ dest.name = src.name
74
+ dest.priority = src.priority
75
+ dest.quick_style = src.quick_style
76
+ dest.unhide_when_used = src.unhide_when_used
77
+
78
+ if src.type == WD_STYLE_TYPE.LIST:
79
+ dest.hidden = src.hidden
80
+ dest.locked = src.locked
81
+ dest.name = src.name
82
+ dest.priority = src.priority
83
+ dest.quick_style = src.quick_style
84
+ dest.style_id = src.style_id
85
+ dest.unhide_when_used = src.unhide_when_used
86
+
87
+ if src.type == WD_STYLE_TYPE.TABLE:
88
+ dest.hidden = src.hidden
89
+ dest.locked = src.locked
90
+ dest.name = src.name
91
+ dest.priority = src.priority
92
+ dest.quick_style = src.quick_style
93
+ dest.unhide_when_used = src.unhide_when_used
94
+ return modified_style
95
+
96
+
97
+ def apply_from(self, template_styles, options_list):
98
+
99
+ if(options_list == []):
100
+ log = {'suppressed_styles': [], 'modified_styles': [], 'added_styles': []}
101
+ else:
102
+ log = {'options_applied': options_list,'suppressed_styles': [], 'modified_styles': [], 'added_styles': []}
103
+
104
+ for s in self.xstyles:
105
+ if s.name in template_styles.names:
106
+ src_style = template_styles.check_particular_styles(s.name)
107
+ log_s = self.copy_style(src=src_style, dest=s)
108
+ if log_s:
109
+ log['modified_styles'].append((s.name, log_s))
110
+
111
+ for s in template_styles.xstyles:
112
+ if not self.contains_style(s):
113
+ log['added_styles'].append(s.name)
114
+ self.xstyles.add_style(s.name, s.type)
115
+ self.copy_style(src=s, dest=self.xstyles[s.name])
116
+ return log
117
+
118
+
119
+ def copy_one_style(self, src_style, dest_style) -> {}:
120
+ log_msg = \
121
+ f"le style {src_style.name} a été mappé sur le style {dest_style.name} du template"
122
+ log_dict = {'style_mapping': log_msg}
123
+ self.copy_style(dest_style, src_style)
124
+ return log_dict
125
+
126
+ def get_style_from_name(self, name: str):
127
+ try:
128
+ s = self.xstyles[name]
129
+ except:
130
+ return None
131
+ return s
132
+
133
+ def contains_style(self, style):
134
+ resp = True
135
+ try:
136
+ s = self.xstyles[style.name]
137
+ except:
138
+ try:
139
+ s = self.xstyles[style.name[1:]]
140
+ except:
141
+ resp = False
142
+ return resp
143
+
144
+ def check_particular_styles(self,style_to_transform : str):
145
+ temp = style_to_transform
146
+ if re.search("^Heading [0-9]$", style_to_transform) or re.search("^Titre [0-9]$", style_to_transform):
147
+ style_to_transform = ".Titre" + style_to_transform[-1]
148
+ res = self.get_style_from_name(style_to_transform)
149
+ if res is None:
150
+ style_to_transform = ".Titre " + style_to_transform[-1]
151
+ res = self.get_style_from_name(style_to_transform)
152
+ else:
153
+ return res
154
+ if res:
155
+ return res
156
+ else:
157
+ return self.get_style_from_name(temp)
158
+ else:
159
+ return self.get_style_from_name(temp)
160
+
161
+ def delete_style(self,style_name):
162
+ self.xstyles[style_name].delete()
163
+ self.names.remove(style_name)
164
+
src/domain/wikidoc.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ the class works but lots of code could be reused
3
+ """
4
+
5
+
6
+ class Doc:
7
+ def __init__(self, fulltext: str = '', title: str = '', params: dict = {}):
8
+ self.params = params
9
+ self.lines = [Line(text.strip(), self.params) for text in fulltext.split("\n") if text.strip()]
10
+ self.title, self.lines = self._get_title(title)
11
+ self.container = Container(lines=self.lines, title=self.title, father=self, params=params)
12
+ self.fulltext = fulltext
13
+
14
+ def _get_title(self, title):
15
+ lines = self.lines
16
+ if self.params['type'] == 'input_text':
17
+ if self.lines and self.lines[0] and self.lines[0].type == 'title':
18
+ title = self.lines[0].text
19
+ lines = lines[1:]
20
+ else:
21
+ title = 'the title is missing'
22
+ return title, lines
23
+
24
+
25
+ class WikiPage(Doc):
26
+
27
+ def __init__(self, fulltext='', title=''):
28
+ self.params = {
29
+ 'type': 'wiki',
30
+ 'startswith_':
31
+ {'== ': '1', '=== ': '2', '==== ': '3', '===== ': '4', '====== ': '5', '======= ': '6'},
32
+ 'endswith_':
33
+ [' ==', ' ===', ' ====', ' =====', ' ======', ' ======'],
34
+
35
+ 'discarded': ["See also", "Notes", "References", "Sources", "External links", "Bibliography",
36
+ "Cinematic adaptations", "Further reading", "Maps"]
37
+ }
38
+ super().__init__(fulltext=fulltext, title=title, params=self.params)
39
+
40
+ def get_paragraphs(self, chunk=500):
41
+ return self.container.get_paragraphs(chunk)
42
+
43
+
44
+ class Container:
45
+
46
+ def __init__(self, lines=[], level=0, title='', father=None, params={}):
47
+
48
+ self.children = []
49
+ self.level = level
50
+ self.title = title
51
+ self.father = father
52
+ self.lines = []
53
+ self._expand(lines)
54
+ if params and 'discarded' in params.keys():
55
+ self.children = [child for child in self.children if child.title not in params['discarded']]
56
+ self.containers = [self]
57
+ for child in self.children:
58
+ self.containers += child.containers
59
+ self.text = ''
60
+ for child in self.children:
61
+ self.text += ' ' + child.text
62
+
63
+ def _expand(self, lines):
64
+ new_child = False
65
+ new_child_lines = []
66
+ new_child_title = []
67
+ for line in lines:
68
+ if not new_child:
69
+ if line.is_structure:
70
+ new_child = True
71
+ new_child_lines = []
72
+ new_child_title = line.text
73
+ line.level = self.level + 1
74
+ else:
75
+ self.lines.append(line)
76
+
77
+ else:
78
+ if self.level + 1 < line.level or not line.is_structure:
79
+ new_child_lines.append(line)
80
+ elif self.level + 1 == line.level:
81
+ self.children.append(Container(lines=new_child_lines,
82
+ level=self.level + 1,
83
+ title=new_child_title,
84
+ father=self))
85
+ new_child_lines = []
86
+ new_child_title = line.text
87
+ if new_child:
88
+ self.children.append(Container(lines=new_child_lines,
89
+ level=self.level + 1,
90
+ title=new_child_title,
91
+ father=self))
92
+
93
+ def get_paragraphs(self, chunk=500):
94
+ if len(self.text) < chunk:
95
+ paragraphs = [self.text]
96
+ else:
97
+ paragraphs = [self.root_text]
98
+ for child in self.children:
99
+ paragraphs += child.get_paragraphs(chunk)
100
+ return paragraphs
101
+
102
+
103
+ class Line:
104
+
105
+ def __init__(self, text, params):
106
+ self.text = text
107
+ self.params = params
108
+ self.type, self.text = self._parse_text()
109
+ self.level = int(self.type) if self.type.isdigit() else -1
110
+ self.is_structure = 0 < self.level
111
+
112
+
113
+ def _parse_text(self):
114
+ def strip_text(text_, start, end):
115
+ text_ = text_.split(start)[1]
116
+ if end != "":
117
+ text_ = text_.split(end)[0]
118
+ # text += ". \n"
119
+ return text_.strip()
120
+
121
+ startswith_ = self.params['startswith_']
122
+
123
+ endswith_ = self.params['endswith_'] if 'endswith_' in self.params.keys() else [""] * len(startswith_)
124
+ types = [(strip_text(self.text, starter, endswith_[i]), startswith_[starter])
125
+ for i, starter in enumerate(startswith_.keys())
126
+ if self.text.startswith(starter)]
127
+ (text, type_) = types[0] if types else (self.text, 'normal')
128
+ return type_, text.strip()
src/llm/llm_tools.py ADDED
@@ -0,0 +1,337 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ TODO: add a boolean to switch llms
3
+ """
4
+
5
+
6
+ import json
7
+ import string
8
+ import openai
9
+
10
+ import wikipedia
11
+ from langchain.text_splitter import CharacterTextSplitter
12
+ from langchain.prompts import PromptTemplate
13
+ from langchain.chains import LLMChain
14
+ from src.llm.llms import openai_llm
15
+ from src.tools.wiki import Wiki
16
+
17
+
18
+ # async def get_wikilist_open_source(task: {}) -> str:
19
+ # """
20
+ # get the titles of wiki pages interesting for solving the given task
21
+ # """
22
+
23
+ # template = ("<s>[INST] Your task consists in finding the list of wikipedia page titles which provide useful content "
24
+ # " for a paragraph whose description is delimited by triple backticks.\n"
25
+ # " Make sure that you provide no more than 10 elements and that the list is actually finished."
26
+ # " Format your response as a valid JSON list of strings separated by commas.[/INST]</s>"
27
+ # " Description: ```{description}```")
28
+
29
+ # prompt = PromptTemplate(template=template, input_variables=['description'])
30
+ # llm_chain = LLMChain(llm=opensource_llm, prompt=prompt)
31
+ # response = llm_chain.run({'description': task['description']})
32
+ # llm_list = response.choices[0].message.content
33
+ # try:
34
+ # wikilist = json.loads(llm_list)
35
+ # except:
36
+ # print("json loads failed with" + llm_list)
37
+ # wikilist = list(llm_list.split(','))
38
+
39
+ # expanded_wikilist = []
40
+
41
+ # expand_factor = 2
42
+
43
+ # for wikipage in wikilist:
44
+ # expanded_wikilist += wikipedia.search(wikipage, expand_factor)
45
+
46
+ # wikilist = list(set(expanded_wikilist))
47
+
48
+ # return wikilist
49
+
50
+
51
+
52
+ async def get_wikilist(task: {}) -> str:
53
+ """
54
+ get the titles of wiki pages interesting for solving the given task
55
+ """
56
+
57
+ llm = openai_llm
58
+ template = (f"\n"
59
+ f" Your task consists in finding the list of wikipedia page titles which provide useful content "
60
+ f" for a paragraph whose description is delimited by triple backticks: ```{task['description']}```\n"
61
+ f" "
62
+ f" Make sure that you provide no more than 10 elements and that the list is actually finished."
63
+ f" Format your response as a valid JSON list of strings separated by commas.\n"
64
+ f" \n"
65
+ f" ")
66
+
67
+ #wikilist = LLMChain(llm=openai_llm, prompt=prompt).run()
68
+ llm_list = llm.invoke(template)
69
+ try:
70
+ wikilist = json.loads(llm_list)
71
+ except:
72
+ print("json loads failed with" + llm_list)
73
+ wikilist = list(llm_list.split(','))
74
+
75
+ expanded_wikilist = []
76
+
77
+ expand_factor = 2
78
+
79
+ for wikipage in wikilist:
80
+ expanded_wikilist += wikipedia.search(wikipage, expand_factor)
81
+
82
+ wikilist = list(set(expanded_wikilist))
83
+
84
+ return wikilist
85
+
86
+
87
+ def extract_list(llm_list: str):
88
+
89
+ def filter_(el: str):
90
+ resp = 2 < len(el)
91
+ usable_length = len([c for c in el if c in string.ascii_letters])
92
+ resp = resp and len(el)*3/4 < usable_length
93
+ return resp
94
+
95
+ try:
96
+ wikilist = llm_list[1:-1].split('"')
97
+ wikilist = [el for el in wikilist if filter_(el)]
98
+ print(wikilist)
99
+ except:
100
+ wikilist = []
101
+ print('issues with the wikilist')
102
+ return wikilist
103
+
104
+
105
+ # def get_public_paragraph_open_source(task: {}) -> str:
106
+ # """returns the task directly performed by chat GPT"""
107
+
108
+ # template = ("<s>[INST] Your task consists in generating a paragraph whose description is delimited by triple "
109
+ # "backticks.\n"
110
+ # " The paragraph belongs at the top level of the hierarchy to a document"
111
+ # " whose doc_description is delimited by triple backticks.\n"
112
+ # " Make sure that the paragraph relates the top level of the document\n"
113
+ # " The paragraph belongs to a higher paragraph in the hierarchy whose description (above) is delimited by "
114
+ # " triple backticks."
115
+ # " Make sure that the paragraph relates with the paragraph in the hierarchy of the document\n"
116
+ # " The paragraphs comes after previous paragraphs whose description (before) is delimited by triple "
117
+ # " backticks.\n"
118
+ # " Make sure that the paragraph relates with previous paragraph without any repetition\n"
119
+ # " The paragraphs comes before next paragraphs whose description (after) is delimited by triple backticks.\n"
120
+ # " Make sure that the paragraph prepares the transition to the next paragraph without any "
121
+ # " repetition. [/INST]</s>"
122
+ # " Description: ```{description}```"
123
+ # " Doc description: ```{doc_description}```"
124
+ # " Above: ```{above}```"
125
+ # " Before: ```{before}```"
126
+ # " After: ```{after}```"
127
+ # )
128
+
129
+ # prompt = PromptTemplate(template=template, input_variables=['description', 'doc_description', 'above', 'before', 'after'])
130
+ # llm_chain = LLMChain(llm=opensource_llm, prompt=prompt)
131
+ # response = llm_chain.run({'description': task['description'], 'doc_description': task['doc_description'],
132
+ # 'above': task['above'], 'before': task['before'], 'after': task['after']})
133
+ # p = response.choices[0].message.content
134
+ # return p
135
+
136
+ def get_public_paragraph(task: {}) -> str:
137
+ """returns the task directly performed by chat GPT"""
138
+ print(task)
139
+ llm = openai_llm
140
+ template = (f"\n"
141
+ f" Your task consists in generating a paragraph\\n"
142
+ f" whose description is delimited by triple backticks: ```{task['description']}```\n"
143
+ f"\n"
144
+ f" The paragraph belongs at the top level of the hierarchy to a document \\n"
145
+ f" whose description is delimited by triple backticks: ``` {task['doc_description']}```\n"
146
+ f" Make sure that the paragraph relates the top level of the document\n"
147
+ f" \n"
148
+ f" The paragraph belongs to a higher paragraph in the hierarchy \\n"
149
+ f" whose description is delimited by triple backticks: ``` {task['above']}```\n"
150
+ f" Make sure that the paragraph relates with the paragraph in the hierarchy of the document\n"
151
+ f" \n"
152
+ f" The paragraphs comes after previous paragraphs \\n"
153
+ f" whose description is delimited by triple backticks: ``` {task['before']}```\n"
154
+ f" Make sure that the paragraph relates with previous paragraph without any repetition\n"
155
+ f" \n"
156
+ f" The paragraphs comes before next paragraphs \\n"
157
+ f" whose description is delimited by triple backticks: ``` {task['after']}```\n"
158
+ f" Make sure that the paragraph prepares the transition to the next paragraph without any repetition\n"
159
+ f" \n"
160
+ f" \n"
161
+ f"\n"
162
+ f" ")
163
+
164
+ p = llm.invoke(template)
165
+
166
+ return p
167
+
168
+
169
+ def create_index(wikilist: [str]):
170
+ """
171
+ useful for creating the index of wikipages
172
+ """
173
+ fetch = Wiki().fetch
174
+
175
+ pages = [(title, fetch(title)) for title in wikilist if type(fetch(title)) != str]
176
+ texts = []
177
+ chunk = 800
178
+ for title, page in pages:
179
+ texts.append(WikiPage(title=title, fulltext=page.page_content))
180
+
181
+ doc_splitter = CharacterTextSplitter(
182
+ separator=".",
183
+ chunk_size=chunk,
184
+ chunk_overlap=100,
185
+ length_function=len,
186
+ )
187
+
188
+ paragraphs = texts[0].get_paragraphs(chunk=800)
189
+
190
+ split_texts = []
191
+ for p in paragraphs:
192
+ split_texts += doc_splitter.split_text(p)
193
+
194
+ for split_text in split_texts:
195
+ assert type(split_text) == str
196
+ assert 0 < len(split_text) < 2 * 500
197
+
198
+ wiki_index = Chroma.from_texts(split_texts)
199
+
200
+ return wiki_index
201
+
202
+
203
+ def get_wiki_paragraph(wiki_index, task: {}) -> str:
204
+ """useful to get a summary in one line from wiki index"""
205
+
206
+ task_description = get_public_paragraph(task)
207
+ wiki_paragraphs = semantic_search(wiki_index, task_description)
208
+ text_content = ""
209
+ for p in wiki_paragraphs:
210
+ text_content += p.page_content + "/n/n"
211
+
212
+ template = (f"\n"
213
+ f" Your task consists in generating a paragraph\\n"
214
+ f" whose description is delimited by triple backticks: ```{task['description']}```\n"
215
+ f"\n"
216
+ f" The text generation is based in the documents provided in these sections \n"
217
+ f" delimited by by triple backticks: ``` {text_content}``` \n"
218
+ f" The paragraph belongs at the top level of the hierarchy to a document \\n"
219
+ f" whose description is delimited by triple backticks: ``` {task['doc_description']}```\n"
220
+ f" Make sure that the paragraph relates the top level of the document\n"
221
+ f" \n"
222
+ f" The paragraph belongs to a higher paragraph in the hierarchy \\n"
223
+ f" whose description is delimited by triple backticks: ``` {task['above']}```\n"
224
+ f" Make sure that the paragraph relates with the paragraph in the hierarchy of the document\n"
225
+ f" \n"
226
+ f" The paragraphs comes after previous paragraphs \\n"
227
+ f" whose description is delimited by triple backticks: ``` {task['before']}```\n"
228
+ f" Make sure that the paragraph relates with previous paragraph without any repetition\n"
229
+ f" \n"
230
+ f" The paragraphs comes before next paragraphs \\n"
231
+ f" whose description is delimited by triple backticks: ``` {task['after']}```\n"
232
+ f" Make sure that the paragraph prepares the transition to the next paragraph without any repetition\n"
233
+ f" \n"
234
+ f" \n"
235
+ f"\n"
236
+ f" ")
237
+
238
+ llm = openai_llm
239
+ p = llm(template)
240
+
241
+ return p
242
+
243
+
244
+ # def get_private_paragraph_open_source(texts, task: {}) -> str:
245
+ # """useful to get a summary in one line from wiki index"""
246
+
247
+ # text_content = ""
248
+ # for t in texts:
249
+ # text_content += t + "/n/n"
250
+
251
+ # template = ("\n"
252
+ # " Your task consists in generating a paragraph"
253
+ # " whose description is delimited by triple backticks\n"
254
+ # " The text generation is based in the documents provided in these sections \n"
255
+ # " delimited by by triple backticks (text_content)\n"
256
+ # " The paragraph belongs at the top level of the hierarchy to a document"
257
+ # " whose description is delimited by triple backticks (doc_decription)\n"
258
+ # " Make sure that the paragraph relates the top level of the document\n"
259
+ # " \n"
260
+ # " The paragraph belongs to a higher paragraph in the hierarchy"
261
+ # " whose description is delimited by triple backticks (above)\n"
262
+ # " Make sure that the paragraph relates with the paragraph in the hierarchy of the document\n"
263
+ # " \n"
264
+ # " The paragraphs comes after previous paragraphs"
265
+ # " whose description is delimited by triple backticks (before)\n"
266
+ # " Make sure that the paragraph relates with previous paragraph without any repetition\n"
267
+ # " \n"
268
+ # " The paragraphs comes before next paragraphs"
269
+ # " whose description is delimited by triple backticks (after)\n"
270
+ # " Make sure that the paragraph prepares the transition to the next paragraph without any repetition\n"
271
+ # " description: ```{description}```"
272
+ # " text_content: ```{text_content}```"
273
+ # " doc_description: ```{doc_description}```"
274
+ # " above: ```{above}```"
275
+ # " before: ```{before}```"
276
+ # " after: ```{after}```")
277
+
278
+ # prompt = PromptTemplate(template=template, input_variables=['description', 'text_content', 'doc_description', 'above', 'before', 'after'])
279
+ # llm_chain = LLMChain(llm=opensource_llm, prompt=prompt)
280
+ # response = llm_chain.run({'description': task['description'], 'text_content': text_content, 'doc_description': task['doc_description'],
281
+ # 'above': task['above'], 'before': task['before'], 'after': task['after']})
282
+ # p = response.choices[0].message.content
283
+
284
+
285
+ def get_private_paragraph(texts, task: {}) -> str:
286
+ """useful to get a summary in one line from wiki index"""
287
+
288
+ text_content = ""
289
+ for t in texts:
290
+ text_content += t + "/n/n"
291
+
292
+ template = (f"\n"
293
+ f" Your task consists in generating a paragraph\\n"
294
+ f" whose description is delimited by triple backticks: ```{task['description']}```\n"
295
+ f"\n"
296
+ f" The text generation is based in the documents provided in these sections \n"
297
+ f" delimited by by triple backticks: ``` {text_content}``` \n"
298
+ f" The paragraph belongs at the top level of the hierarchy to a document \\n"
299
+ f" whose description is delimited by triple backticks: ``` {task['doc_description']}```\n"
300
+ f" Make sure that the paragraph relates the top level of the document\n"
301
+ f" \n"
302
+ f" The paragraph belongs to a higher paragraph in the hierarchy \\n"
303
+ f" whose description is delimited by triple backticks: ``` {task['above']}```\n"
304
+ f" Make sure that the paragraph relates with the paragraph in the hierarchy of the document\n"
305
+ f" \n"
306
+ f" The paragraphs comes after previous paragraphs \\n"
307
+ f" whose description is delimited by triple backticks: ``` {task['before']}```\n"
308
+ f" Make sure that the paragraph relates with previous paragraph without any repetition\n"
309
+ f" \n"
310
+ f" The paragraphs comes before next paragraphs \\n"
311
+ f" whose description is delimited by triple backticks: ``` {task['after']}```\n"
312
+ f" Make sure that the paragraph prepares the transition to the next paragraph without any repetition\n"
313
+ f" \n"
314
+ f" \n"
315
+ f"\n"
316
+ f" ")
317
+
318
+ llm = openai_llm
319
+ p = llm.invoke(template)
320
+
321
+ return p
322
+
323
+ def summarize_paragraph_v2(prompt : str, title_doc : str = '', title_para : str = ''):
324
+ max_tokens = 850
325
+ location_of_the_paragraph = prompt.split(" :")[0]
326
+ """summarizes the paragraph"""
327
+ task = (f"Your task consists in summarizing in English the paragraph of the document untitled ```{title_doc}``` located in the ```{location_of_the_paragraph}``` section of the document."
328
+ f"The paragraph title is ```{title_para}```."
329
+ f"Your response shall be concise and shall respect the following format:"
330
+ f"<summary>"
331
+ f"If you see that the summary that you are creating will not respect ```{max_tokens}``` tokens, find a way to make it shorter.")
332
+ generation = openai.chat.completions.create(model="gpt-3.5-turbo-16k", messages=[{"role":"system","content":task},{"role":"user","content":prompt}])
333
+ res = generation.choices[0].message.content
334
+ print("****************")
335
+ print(res)
336
+ print("----")
337
+ return str(res).strip()
src/llm/llms.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_openai import OpenAI
2
+ from transformers import AutoModelForCausalLM
3
+ import os
4
+
5
+
6
+ os.environ["TOKENIZERS_PARALLELISM"] = "true"
7
+
8
+ if not "OPENAI_API_KEY" in os.environ:
9
+ from config_key import OPENAI_API_KEY
10
+
11
+ os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
12
+
13
+ openai_llm = OpenAI(temperature=0, model="gpt-3.5-turbo-instruct")
14
+
15
+ # opensource_llm = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf") #LAMA MODEL
src/model/block.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class Block:
2
+ def __init__(self, doc: str = '', title: str = '', content: str = '', content_fr: str = '',
3
+ index: str = '', rank: int = 0, level: int = 0, distance: float = 99999):
4
+ self.doc = doc
5
+ self.title = title
6
+ self.title_fr = ""
7
+ self.content = content
8
+ self.content_fr = content_fr
9
+ self.specials = []
10
+ self.index = index
11
+ self.rank = rank
12
+ self.level = level
13
+ self.distance = distance
14
+
15
+ def to_dict(self) -> {}:
16
+ block_dict = {'doc': self.doc,
17
+ 'title': self.title,
18
+ 'title_fr': self.title_fr,
19
+ 'content': self.content,
20
+ 'content_fr': self.content_fr,
21
+ 'index': self.index,
22
+ 'rank': self.rank,
23
+ 'level': self.level,
24
+ 'distance': self.distance}
25
+ for i, s in enumerate(self.specials):
26
+ special_key = 'special_'+str(i)
27
+ block_dict[special_key] = s
28
+ block_dict['specials_len'] = len(self.specials)
29
+ return block_dict
30
+
31
+ def from_dict(self, block_dict: {}):
32
+ self.doc = block_dict['doc']
33
+ self.title = block_dict['title']
34
+ self.title_fr = block_dict['title_fr']
35
+ self.content = block_dict['content']
36
+ self.content_fr = block_dict['content_fr']
37
+ self.index = block_dict['index']
38
+ self.rank = block_dict['rank']
39
+ self.level = block_dict['level']
40
+ self.distance = block_dict['distance']
41
+ self.specials = []
42
+ for i in range(block_dict['specials_len']):
43
+ special_key = 'special_' + str(i)
44
+ self.specials.append(block_dict[special_key])
45
+ return self
46
+
47
+ @property
48
+ def distance_str(self) -> str:
49
+ return format(self.distance, '.2f')
src/model/container.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.model.paragraph import Paragraph
2
+ from src.model.block import Block
3
+
4
+ INFINITE = 99999
5
+
6
+
7
+ class Container:
8
+
9
+ def __init__(self, paragraphs: [Paragraph], title: Paragraph = None, level: int = 0, index: [int] = None,
10
+ father=None, id_=0):
11
+ if index is None:
12
+ index = []
13
+ self.level = level
14
+ self.title = title
15
+ self.paragraphs = []
16
+ self.children = []
17
+ self.index = index
18
+ self.father = father # if not father, then the container is at the top of the hierarchy
19
+ self.id_ = int(str(1) + str(father.id_) + str(id_))
20
+ if paragraphs:
21
+ self.paragraphs, self.children = self.create_children(paragraphs, level, index)
22
+ self.blocks = self.get_blocks()
23
+ self.normals, self.comments, self.tasks = self.sort_paragraphs()
24
+
25
+
26
+ @property
27
+ def text(self):
28
+ text = ""
29
+ if self.title:
30
+ text = "Titre " + str(self.level) + " : " + self.title.text + '\n'
31
+ for p in self.paragraphs:
32
+ text += p.text + '\n'
33
+ for child in self.children:
34
+ text += child.text
35
+ return text
36
+
37
+ @property
38
+ def text_chunks(self, chunk=500):
39
+ text_chunks = []
40
+ text_chunk = ""
41
+ for p in self.paragraphs:
42
+ if chunk < len(text_chunk) + len(p.text):
43
+ text_chunks.append(text_chunk)
44
+ text_chunk = ""
45
+ else:
46
+ text_chunk += " " + p.text
47
+ if text_chunk and not text_chunk.isspace():
48
+ text_chunks.append(text_chunk)
49
+ for child in self.children:
50
+ text_chunks += child.text_chunks
51
+ return text_chunks
52
+
53
+ def get_blocks(self):
54
+ block = Block(level=self.level, index=self.index)
55
+ if self.title:
56
+ block.title = self.title.text
57
+ for p in self.paragraphs:
58
+ if not p.blank:
59
+ if p.text.startswith('##### '):
60
+ special_action = p.text.lstrip('##### ')
61
+ block.specials.append(special_action)
62
+ else:
63
+ block.content += p.text
64
+ blocks = [block] if block.content or block.specials else []
65
+ for child in self.children:
66
+ blocks += child.blocks
67
+ return blocks
68
+
69
+ def create_children(self, paragraphs: Paragraph, level: int, index: [int]) -> ([Paragraph], []):
70
+ """
71
+ creates children containers or directly attached content
72
+ and returns the list of containers and contents of level+1
73
+ :return:
74
+ [Content or Container]
75
+ """
76
+ attached_paragraphs = []
77
+ container_paragraphs = []
78
+ container_title = None
79
+ children = []
80
+ in_children = False
81
+ child_id = 0
82
+ level = INFINITE
83
+
84
+ while paragraphs:
85
+ p = paragraphs.pop(0)
86
+ if not in_children and not p.is_structure:
87
+ attached_paragraphs.append(p)
88
+ else:
89
+ in_children = True
90
+ if p.is_structure and p.level <= level: # if p is higher in hierarchy, then the child is completed
91
+ if container_paragraphs or container_title:
92
+ if level <= len(index):
93
+ index = index[:level]
94
+ index[-1] += 1
95
+ else:
96
+ for i in range(level-len(index)):
97
+ index.append(1)
98
+ children.append(Container(container_paragraphs, container_title, level, index, self, child_id))
99
+ child_id += 1
100
+ container_paragraphs = []
101
+ container_title = p
102
+ level = p.level
103
+
104
+ else: # p is normal text or strictly lower in hierarchy, then the child continues to grow
105
+ container_paragraphs.append(p)
106
+
107
+ if container_paragraphs or container_title:
108
+ if level <= len(index):
109
+ index = index[:level]
110
+ index[-1] += 1
111
+ else:
112
+ for i in range(level - len(index)):
113
+ index.append(1)
114
+ children.append(Container(container_paragraphs, container_title, level, index, self, child_id))
115
+ child_id += 1
116
+
117
+ return attached_paragraphs, children
118
+
119
+ @property
120
+ def structure(self):
121
+
122
+ self_structure = {str(self.id_): {
123
+ 'index': str(self.id_),
124
+ 'canMove': True,
125
+ 'isFolder': True,
126
+ 'children': [p.id_ for p in self.paragraphs] + [child.id_ for child in self.children],
127
+ 'canRename': True,
128
+ 'data': {},
129
+ 'level': self.level,
130
+ 'rank': self.rank,
131
+ 'title': self.title.text if self.title else 'root'
132
+ }}
133
+ paragraphs_structure = [p.structure for p in self.paragraphs]
134
+ structure = [self_structure] + paragraphs_structure
135
+ for child in self.children:
136
+ structure += child.structure
137
+ return structure
138
+
139
+ def sort_paragraphs(self) -> ([Paragraph], [Paragraph], [Paragraph]):
140
+ mapping = {'normal': [], 'comment': [], 'task': []}
141
+ for p in self.paragraphs:
142
+ mapping(p.type).append(p)
143
+ return mapping['normal'], mapping['comment'], mapping['task']
src/model/doc.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import docx
2
+
3
+ from src.model.container import Container
4
+ from src.model.paragraph import Paragraph
5
+
6
+
7
+ class Doc:
8
+
9
+ def __init__(self, path='', id_=None):
10
+
11
+ self.xdoc = docx.Document(path)
12
+ self.title = path.split('/')[-1]
13
+ self.id_ = id(self)
14
+ self.path = path
15
+ paragraphs = [Paragraph(xp, self.id_, i) for (i, xp) in enumerate(self.xdoc.paragraphs)]
16
+ self.container = Container(paragraphs, father=self, level=0)
17
+ self.blocks = self.get_blocks()
18
+ self.tasks = [c.get_task(self.container.one_liner) for c in self.container.containers if c.task]
19
+
20
+ @property
21
+ def structure(self):
22
+
23
+ return self.container.structure
24
+
25
+ def get_blocks(self):
26
+
27
+ def from_list_to_str(index_list):
28
+ index_str = str(index_list[0])
29
+ for el in index_list[1:]:
30
+ index_str += '.' + str(el)
31
+ return index_str
32
+
33
+ blocks = self.container.blocks
34
+ for block in blocks:
35
+ block.doc = self.title
36
+ if block.level == 0:
37
+ blocks.remove(block)
38
+ block.index = from_list_to_str(block.index)
39
+ return blocks
40
+ """
41
+ current_level = len(current_index)
42
+ if 0 < block.level:
43
+ if block.level == current_level:
44
+ current_index[-1] += 1
45
+ elif current_level < block.level:
46
+ current_index.append(1)
47
+ elif block.level < current_level:
48
+ current_index = current_index[:block.level]
49
+ current_index[-1] += 1
50
+ block.index = from_list_to_str(current_index)
51
+ else:
52
+ block.index = "0"
53
+ """
54
+
src/model/paragraph.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import string
2
+
3
+ INFINITE = 10000
4
+
5
+
6
+ class Paragraph:
7
+
8
+ def __init__(self, xparagraph, doc_id: int, id_: int):
9
+
10
+ self.xparagraph = xparagraph
11
+ self.id_ = int(str(2) + str(doc_id) + str(id_))
12
+ self.level = self.get_level_from_name()
13
+ self.is_structure = self.level < INFINITE
14
+ self.text = self.xparagraph.text
15
+ self.type = self.get_type()
16
+
17
+ @property
18
+ def structure(self):
19
+ structure = {str(self.id_): {
20
+ 'index': str(self.id_),
21
+ 'canMove': True,
22
+ 'isFolder': False,
23
+ 'children': [],
24
+ 'title': self.text,
25
+ 'canRename': True,
26
+ 'data': {},
27
+ 'level': self.level,
28
+ }}
29
+ return structure
30
+
31
+ @property
32
+ def blank(self):
33
+ """
34
+ checks if the paragraph is blank: i.e. it brings some signal (it may otherwise be ignored)
35
+ """
36
+ text = self.text.replace('\n', '')
37
+ return set(text).isdisjoint(string.ascii_letters)
38
+
39
+ def get_level_from_name(self) -> int:
40
+ style_name = self.xparagraph.style.name
41
+ level = INFINITE
42
+ if '.Titre' in style_name:
43
+ suffix = style_name[-1]
44
+ try:
45
+ level = int(suffix)
46
+ except:
47
+ pass
48
+ return level
49
+
50
+
src/reader/reader_for_requirements.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import docx
2
+ import os
3
+ from docx.document import Document as _Document
4
+ from src.domain.requirements_paragraphs import Requirement_Paragraph
5
+ from docx.oxml.text.paragraph import CT_P
6
+ from docx.oxml.table import CT_Tbl
7
+ from docx.table import _Cell, Table
8
+ from docx.text.paragraph import Paragraph
9
+
10
+ class WordReader:
11
+
12
+ def __init__(self, path):
13
+ self.path = path
14
+ self.paragraphs = self.get_paragraphs()
15
+
16
+ def iter_block_items(self, parent):
17
+ if isinstance(parent, _Document):
18
+ parent_elm = parent.element.body
19
+ elif isinstance(parent, _Cell):
20
+ parent_elm = parent._tc
21
+ else:
22
+ raise ValueError("Unsupported parent type")
23
+
24
+ for child in parent_elm.iterchildren():
25
+ if isinstance(child, CT_P):
26
+ yield Paragraph(child, parent)
27
+ elif isinstance(child, CT_Tbl):
28
+ yield Table(child, parent)
29
+
30
+ def get_paragraphs(self):
31
+ if not os.path.exists(self.path):
32
+ raise FileNotFoundError(f"The file {self.path} does not exist.")
33
+ try:
34
+ doc = docx.Document(self.path)
35
+ paragraph_objects = []
36
+ paragraph_id = 0
37
+ page_id = 1 # Example page ID
38
+ total_characters = 0
39
+ for block in self.iter_block_items(doc):
40
+ if isinstance(block, Paragraph):
41
+ paragraph_info = self.extract_paragraph_info(block)
42
+ if paragraph_info: # Only append if paragraph is not empty
43
+ page_id = self.estimate_page_number(total_characters)
44
+ p_obj = Requirement_Paragraph(text=paragraph_info['text'], font_style=paragraph_info['style'], id_=paragraph_id, page_id=page_id)
45
+ #print(f"Found paragraph: {paragraph_info['style']}...") # DEBUG
46
+ paragraph_objects.append(p_obj)
47
+ paragraph_id += 1
48
+ total_characters += len(paragraph_info['text'])
49
+ elif isinstance(block, Table):
50
+ table_paragraph, table_style = self.table_to_paragraph(block)
51
+ if table_paragraph.strip(): # Check if table paragraph is not empty
52
+ #print(f"Found table. Predominant style: {table_style}") # DEBUG
53
+ p_obj = Requirement_Paragraph(text=table_paragraph, font_style=table_style, id_=paragraph_id, page_id=page_id)
54
+ paragraph_objects.append(p_obj)
55
+ paragraph_id += 1
56
+ return paragraph_objects
57
+ except Exception as e:
58
+ raise ValueError(f"Error reading the .docx file. Original error: {str(e)}")
59
+
60
+
61
+ def determine_predominant_style(self, styles):
62
+ # Count the occurrences of each style
63
+ style_counts = {}
64
+ for style in styles:
65
+ if style in style_counts:
66
+ style_counts[style] += 1
67
+ else:
68
+ style_counts[style] = 1
69
+
70
+ # Find the style with the highest count
71
+ predominant_style = max(style_counts, key=style_counts.get, default="None")
72
+ return predominant_style
73
+
74
+ def estimate_page_number(self, total_characters):
75
+ avg_chars_per_page = 2000
76
+ return total_characters // avg_chars_per_page + 1
77
+
78
+ def extract_paragraph_info(self, paragraph):
79
+ # Check if paragraph is empty
80
+ if not paragraph.text.strip():
81
+ return None # Return None for empty paragraphs
82
+
83
+ paragraph_style = paragraph.style.name if paragraph.style else 'None'
84
+
85
+ runs = []
86
+ for run in paragraph.runs:
87
+ run_details = {
88
+ 'text': run.text,
89
+ 'font_name': run.font.name,
90
+ 'font_size': run.font.size.pt if run.font.size else None,
91
+ 'bold': run.bold,
92
+ 'italic': run.italic,
93
+ 'underline': run.underline
94
+ }
95
+ runs.append(run_details)
96
+
97
+ return {
98
+ 'text': paragraph.text,
99
+ 'style': paragraph_style,
100
+ 'runs': runs
101
+ }
102
+
103
+
104
+
105
+ def table_to_paragraph(self, table):
106
+ table_text = ""
107
+ table_styles = set()
108
+
109
+ for row in table.rows:
110
+ for cell in row.cells:
111
+ cell_text = ""
112
+ for paragraph in cell.paragraphs:
113
+ paragraph_style = paragraph.style.name if paragraph.style else 'None'
114
+ table_styles.add(paragraph_style)
115
+
116
+ for run in paragraph.runs:
117
+ cell_text += run.text
118
+
119
+ cell_text += " "
120
+ table_text += cell_text.strip() + " | " # Add a separator for cells
121
+ table_text = table_text.strip() + "\n" # Add a newline for rows
122
+
123
+ predominant_style = self.determine_predominant_style(table_styles)
124
+
125
+ return table_text.strip(), predominant_style
126
+
127
+ def print_paragraphs_and_tables(self):
128
+ try:
129
+ print("start")
130
+ doc_items = self.get_paragraphs()
131
+ for item in doc_items:
132
+ if 'paragraph' in item:
133
+ print("Paragraph:", item['paragraph']['text'])
134
+ elif 'table' in item:
135
+ print("Table:")
136
+ for row in item['table']:
137
+ for cell in row:
138
+ for paragraph in cell:
139
+ print(" Cell Paragraph:", paragraph['text'])
140
+ print('-' * 40) # separator for clarity
141
+
142
+ except Exception as e:
143
+ print(f"Error: {str(e)}")
src/retriever/retriever.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.domain.block import Block
2
+ from src.domain.doc import Doc
3
+ from src.llm.llm_tools import summarize_paragraph_v2
4
+ import gradio as gr
5
+
6
+ class Retriever:
7
+ """
8
+ The Retriever class is responsible for processing and summarizing documents.
9
+ It supports operations such as summarizing individual blocks of text, organizing
10
+ text into a hierarchy, and conducting similarity searches within a collection of documents.
11
+
12
+ Attributes:
13
+ collection: A collection object where summaries and metadata are stored.
14
+ llmagent: An instance of LlmAgent used for generating summaries.
15
+ """
16
+
17
+
18
+ def __init__(self, doc: Doc = None, collection=None):
19
+ """
20
+ Initializes the Retriever class with a document, a collection, and a language model agent.
21
+
22
+ Args:
23
+ doc: A document object containing text blocks to be processed.
24
+ collection: A collection object to store summaries and metadata.
25
+ llmagent: An instance of LlmAgent for generating summaries.
26
+ """
27
+
28
+ if doc is not None:
29
+ self.collection = collection
30
+ blocks_good_format = doc.blocks_requirements # List of Block objects from the document.
31
+ gr.Info("Please wait while the database is being created")
32
+
33
+ # Process each block in the document.
34
+ for block in blocks_good_format:
35
+ print(f"block index : {block.index}")
36
+ # If block content is longer than 4500 characters, split and summarize separately.
37
+ if len(block.content) > 4500:
38
+ new_blocks = block.separate_1_block_in_n(max_size=4500)
39
+ for new_block in new_blocks:
40
+ summary = summarize_paragraph_v2(prompt=new_block.content, title_doc=doc.title, title_para=block.title)
41
+ if "<summary>" in summary:
42
+ summary = summary.split("<summary>")[1]
43
+
44
+ self.collection.add(
45
+ documents=[summary],
46
+ ids=[new_block.index],
47
+ metadatas=[new_block.to_dict()]
48
+ )
49
+ else:
50
+ # Summarize the block as is if it's shorter than 4500 characters.
51
+ summary = summarize_paragraph_v2(prompt=block.content, title_doc=doc.title, title_para=block.title)
52
+ if "<summary>" in summary:
53
+ summary = summary.split("<summary>")[1]
54
+ self.collection.add(
55
+ documents=[summary],
56
+ ids=[block.index],
57
+ metadatas=[block.to_dict()]
58
+ )
59
+
60
+ # Summarize blocks by their hierarchy level after individual processing.
61
+ self.summarize_by_hierarchy(blocks_good_format, doc.title)
62
+ gr.Info(f"The collection {collection.name} has been added to the database")
63
+ else:
64
+ self.collection = collection
65
+
66
+
67
+
68
+
69
+ def summarize_by_hierarchy(self, blocks, doc_title):
70
+ """
71
+ Summarizes blocks based on their hierarchical levels.
72
+
73
+ Args:
74
+ blocks: A list of Block objects to be summarized.
75
+ llmagent: An instance of LlmAgent used for generating summaries.
76
+ doc_title: The title of the document being processed.
77
+ """
78
+ hierarchy = self.create_hierarchy(blocks)
79
+ deepest_blocks_indices = self.find_deepest_blocks(blocks)
80
+ print("Hierarchy levels identified:", hierarchy.keys())
81
+ print("Deepest block indices:", [block.index for block in deepest_blocks_indices])
82
+
83
+ for level, level_blocks in hierarchy.items():
84
+ # Summarize only if the level has more than one block and contains deepest blocks.
85
+ print(level)
86
+ print(level_blocks)
87
+ print(deepest_blocks_indices)
88
+ print(len(level_blocks))
89
+ if len(level_blocks) > 1 and any(block.index in deepest_blocks_indices for block in level_blocks):
90
+ level_content = " ".join(block.content for block in level_blocks)
91
+
92
+ print(f"Summarizing level {level} with content from blocks: {[block.index for block in level_blocks]}")
93
+ level_summary = summarize_paragraph_v2(prompt=level_content, title_doc=doc_title, title_para=f"Summary of section : {level}")
94
+
95
+ level_summary_id = f"summary_{level}"
96
+ # Initialize a new Block object with properties from the first block
97
+
98
+ first_block = level_blocks[0]
99
+ combined_block = Block(
100
+ doc=first_block.doc,
101
+ title=first_block.title,
102
+ content=" ".join(block.content for block in level_blocks),
103
+ index=first_block.index,
104
+ rank=first_block.rank,
105
+ level=first_block.level,
106
+ distance=first_block.distance
107
+ )
108
+
109
+
110
+ self.collection.add(
111
+ documents=[level_summary],
112
+ ids=[level_summary_id],
113
+ metadatas=[combined_block.to_dict()] # Pass the combined block metadata
114
+ )
115
+ # List of dictionaries, each representing a block
116
+
117
+ print(f"Added summary for level {level} to the collection.")
118
+ else:
119
+ # Skip summarization for levels that are deepest blocks.
120
+ print(f"Skipping level {level} as it is deepest blocks.")
121
+
122
+
123
+ def create_hierarchy(self, blocks):
124
+ """
125
+ Creates a hierarchical structure of the blocks based on their indices.
126
+
127
+ Args:
128
+ blocks: A list of Block objects to be organized into a hierarchy.
129
+
130
+ Returns:
131
+ A dictionary representing the hierarchy of blocks.
132
+ """
133
+ hierarchy = {}
134
+ for block in blocks:
135
+ levels = self.extract_levels(block.index)
136
+ for level in levels:
137
+ hierarchy.setdefault(level, []).append(block)
138
+ return hierarchy
139
+
140
+
141
+ def extract_levels(self, index):
142
+ """
143
+ Extracts all hierarchical levels from a block index.
144
+
145
+ Args:
146
+ index: The index string of a block.
147
+
148
+ Returns:
149
+ A list of levels extracted from the index.
150
+ """
151
+ # Splits the index string and creates a list of hierarchical levels.
152
+ parts = index.split('.')
153
+ levels = ['.'.join(parts[:i]) for i in range(1, len(parts) + 1)]
154
+ return levels
155
+
156
+
157
+ def find_deepest_blocks(self, blocks):
158
+ """
159
+ Identifies the deepest blocks in the hierarchy.
160
+
161
+ Args:
162
+ blocks: A list of Block objects.
163
+
164
+ Returns:
165
+ A set of indices representing the deepest blocks.
166
+ """
167
+ deepest_blocks = set()
168
+ block_indices = {block.index for block in blocks}
169
+ for block in blocks:
170
+ # A block is considered deepest if no other block's index extends it.
171
+ if not any(b_index != block.index and b_index.startswith(block.index + '.') for b_index in block_indices):
172
+ deepest_blocks.add(block.index)
173
+ return deepest_blocks
174
+
175
+
176
+
177
+ def similarity_search(self, queries: str) -> {}:
178
+ """
179
+ Performs a similarity search in the collection based on given queries.
180
+
181
+ Args:
182
+ queries: A string or list of strings representing the query or queries.
183
+
184
+ Returns:
185
+ A list of Block objects that are similar to the given queries.
186
+ """
187
+ # Query the collection and retrieve blocks based on similarity.
188
+ res = self.collection.query(query_texts=queries, n_results=5)
189
+ block_dict_sources = res['metadatas'][0]
190
+ distances = res['distances'][0]
191
+ blocks = []
192
+ for bd, d in zip(block_dict_sources, distances):
193
+ b = Block().from_dict(bd)
194
+ b.distance = d
195
+ blocks.append(b)
196
+
197
+ return blocks
198
+
src/tools/doc_tools.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+ import os
3
+ import docx
4
+ import docx.oxml.ns as ns
5
+
6
+ def get_positions(xml_file):
7
+ i = 0
8
+ width = xml_file.split('cx="')
9
+ height = xml_file.split('cy="')
10
+ while(i < len(width)):
11
+ temp = width[i].split('"')[0]
12
+ if(temp.isnumeric()):
13
+ width = temp
14
+ break
15
+ else:
16
+ i+=1
17
+ i = 0
18
+ while(i < len(height)):
19
+ temp = height[i].split('"')[0]
20
+ if(temp.isnumeric()):
21
+ height = temp
22
+ break
23
+ else:
24
+ i+=1
25
+ return width, height
26
+
27
+ def convert_to_png(imageslist):
28
+ for image in imageslist:
29
+ if(image.endswith('.png')):
30
+ continue
31
+ im = Image.open(image)
32
+ im.save(image.split('.')[0]+'.png')
33
+ imageslist[imageslist.index(image)] = image.split('.')[0]+'.png'
34
+ os.remove(image)
35
+ return imageslist
36
+
37
+
38
+ def get_difference_with_template(styles_used_in_doc, template):
39
+ styles_used_in_template = template.styles.names
40
+ different_styles = []
41
+ for style in styles_used_in_doc:
42
+ if style.name not in styles_used_in_template:
43
+ if style.name not in [s.name for s in different_styles]:
44
+ different_styles.append(style)
45
+ return different_styles
46
+
47
+
48
+ def update_table_of_contents(doc):
49
+ # Find the settings element in the document
50
+ settings_element = doc.settings.element
51
+
52
+ # Create an "updateFields" element and set its "val" attribute to "true"
53
+ update_fields_element = docx.oxml.shared.OxmlElement('w:updateFields')
54
+ update_fields_element.set(ns.qn('w:val'), 'true')
55
+
56
+ # Add the "updateFields" element to the settings element
57
+ settings_element.append(update_fields_element)
58
+
59
+
60
+ def left_part_until_number(s):
61
+ for i, char in enumerate(s):
62
+ if char.isdigit():
63
+ return s[:i]
64
+ return None
65
+
66
+ def get_title(path) -> str:
67
+ if '/' not in path and '\\' not in path:
68
+ res = path
69
+ if '/' in path:
70
+ res = path.split('/')[-1]
71
+ if '\\' in path:
72
+ res = path.split('\\')[-1]
73
+ return res
src/tools/index_creation.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.domain.container import Container
2
+
3
+ INFINITE = 99999
4
+
5
+ def create_dic_levels(c:Container,dict_of_levels : dict = {}):
6
+ if c.level == 0:
7
+ dict_of_levels[c.level] = [0]
8
+ for child in c.children:
9
+ if child.level not in dict_of_levels:
10
+ dict_of_levels[child.level] = [1 for _ in range(child.level)]
11
+ create_dic_levels(child, dict_of_levels)
12
+ if INFINITE in dict_of_levels.keys():
13
+ dict_of_levels[INFINITE] = [1]
14
+ return dict_of_levels
15
+
16
+
17
+ def create_good_indexes(c:Container, dict_of_levels : dict):
18
+ actual_level = c.level
19
+ c.index = dict_of_levels[actual_level].copy()
20
+ actual_len = len(dict_of_levels[actual_level])
21
+ temp_update = dict_of_levels[actual_level][-1]
22
+ dict_of_levels[actual_level][-1] += 1
23
+ for i in dict_of_levels.values():
24
+ if len(i) > actual_len:
25
+ i[actual_len - 1] = temp_update
26
+ for child in c.children:
27
+ c_lvl = child.level
28
+ for i in dict_of_levels.values():
29
+ if len(i) > c_lvl:
30
+ i[c_lvl:] = [1 for _ in range(len(i[c_lvl:]))]
31
+ create_good_indexes(child, dict_of_levels) # Apply the function recursively to all children
32
+
33
+
34
+ def create_good_indexes_not_ordered_titles(c:Container, dict_of_levels : dict):
35
+ actual_level = c.level
36
+ c.index = dict_of_levels[actual_level].copy()
37
+ actual_len = len(dict_of_levels[actual_level])
38
+ temp_update = dict_of_levels[actual_level][-1]
39
+ dict_of_levels[actual_level][-1] += 1
40
+ for i in dict_of_levels.values():
41
+ if len(i) > actual_len:
42
+ i[actual_len - 1] = temp_update
43
+ for child in c.children:
44
+ c_lvl = child.level
45
+ for i in dict_of_levels.values():
46
+ if len(i) > c_lvl:
47
+ i[c_lvl:] = [1 for _ in range(len(i[c_lvl:]))]
48
+ create_good_indexes(child, dict_of_levels) # Apply the function recursively to all children
49
+
50
+
51
+ def set_good_block_indexes(c:Container):
52
+ for i in c.containers:
53
+ for b in i.blocks:
54
+ for j in range(len(i.index)):
55
+ if i.index[j] == 0:
56
+ i.index[j] = 1
57
+ b.index = i.index
58
+
59
+
60
+ def set_indexes(c:Container, path : str):
61
+ if "temp/generated_files" in path or "data/templates" in path:
62
+ return
63
+ dict_levels = create_dic_levels(c)
64
+ myKeys = list(dict_levels.keys())
65
+ myKeys.sort()
66
+ dict_levels = {key: dict_levels[key] for key in myKeys}
67
+ if c.children and c.children[0] and (c.children[0].level > min(list(dict_levels.keys())[1:])):
68
+ c.children[0].level = min(list(dict_levels.keys())[1:])
69
+ create_good_indexes_not_ordered_titles(c, dict_levels)
70
+ else:
71
+ create_good_indexes(c, dict_levels)
72
+ set_good_block_indexes(c)
src/tools/list_tool.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def keep_last_occurrences(lst, key):
2
+ seen = set()
3
+ for idx in reversed(range(len(lst))):
4
+ item = lst[idx]
5
+ k = key(item)
6
+ if k in seen:
7
+ del lst[idx]
8
+ else:
9
+ seen.add(k)
10
+ return lst
11
+
12
+ def delete_duplicate_styles(list_styles_to_update, different_styles):
13
+ for s in list_styles_to_update:
14
+ for d in different_styles:
15
+ if s["doc"].name == d["doc"].name and s["list_style"].split(" : ")[0] == d["style"].name:
16
+ different_styles.remove(d)
17
+ return different_styles
src/tools/paragraph_tools.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from xml.etree import ElementTree as ET
2
+
3
+ def find_list_indentation_level(para, doc):
4
+ namespace = {"w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main"}
5
+ xml_para = para._p.xml
6
+ try:
7
+ xml_numbering = doc.xdoc._part.numbering_part.element.xml
8
+ except:
9
+ return False, 0
10
+ root_para = ET.fromstring(xml_para)
11
+ root_numbering = ET.fromstring(xml_numbering)
12
+ abstract_num_reference = []
13
+ for item in root_numbering:
14
+ if item.tag == "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}num":
15
+ abstract_num_reference.append(item)
16
+ is_numPr = root_para.find(".//w:numPr", namespaces=namespace)
17
+ is_style = root_para.find(".//w:pStyle", namespaces=namespace)
18
+ is_numId = root_para.find(".//w:numId", namespaces=namespace)
19
+ is_lvl = root_para.find(".//w:ilvl", namespaces=namespace)
20
+ if is_numPr != None:
21
+ if is_numId != None and is_lvl != None:
22
+ return True, int(is_lvl.attrib["{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val"]) + 1
23
+ elif is_numId != None and is_lvl == None:
24
+ numId = int(is_numId.attrib["{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val"])
25
+ is_abstractNumId = [item for item in abstract_num_reference if item.attrib["{http://schemas.openxmlformats.org/wordprocessingml/2006/main}numId"] == str(numId)][0]
26
+ numID_reference = is_abstractNumId.find(".//w:abstractNumId", namespaces=namespace).attrib["{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val"]
27
+ real_numID = root_numbering.find(f".//w:abstractNum[@w:abstractNumId='{int(numID_reference)}']", namespaces=namespace)
28
+ if style_Id == None:
29
+ return False, 0
30
+ style_Id = is_style.attrib["{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val"]
31
+ is_style_in_numbering = root_numbering.find(f".//w:pStyle[@w:val='{style_Id}']...", namespaces=namespace)
32
+ lvl = real_numID.find(".//w:ilvl", namespaces=namespace)
33
+ return True, int(lvl.attrib["{http://schemas.openxmlformats.org/wordprocessingml/2006/main}ilvl"]) + 1
34
+ else:
35
+ if is_style == None:
36
+ return False, 0
37
+ else:
38
+ #check if there is a style element in the root_numbering
39
+ style_Id = is_style.attrib["{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val"]
40
+ is_style_in_numbering = root_numbering.find(f".//w:pStyle[@w:val='{style_Id}']...", namespaces=namespace)
41
+ if is_style_in_numbering == None:
42
+ return False, 0
43
+ else:
44
+ ilvl = is_style_in_numbering.attrib["{http://schemas.openxmlformats.org/wordprocessingml/2006/main}ilvl"]
45
+ return True, int(ilvl) + 1
src/tools/pretty_print.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.domain.block import Block
2
+ from src.domain.requirements_paragraphs import Requirement_Paragraph
3
+
4
+ def pretty_print_block_and_indexes(b : [Block]):
5
+ for block in b:
6
+ print(f"{block.index} {block.title if block.title else '___NO TITLE__'}")
7
+ print(f"----------------------------------")
8
+
9
+ def pretty_print_paragraphs(para : [Requirement_Paragraph]):
10
+ for p in para:
11
+ print(f"{p.level} --> {p.font_style} : {p.text}")
12
+ print("-------------------")
src/tools/semantic_db.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import chromadb
2
+ from datetime import datetime
3
+
4
+ chroma_client = chromadb.Client()
5
+
6
+
7
+ def get_or_create_collection(coll_name: str):
8
+ date = coll_name[:6]
9
+ coll = chroma_client.get_or_create_collection(name=coll_name, metadata={"date": date})
10
+ return coll
11
+
12
+
13
+ def get_collection(coll_name: str):
14
+ coll = chroma_client.get_collection(name=coll_name)
15
+ return coll
16
+
17
+
18
+ def reset_collection(coll_name: str):
19
+ coll = chroma_client.get_collection(name=coll_name)
20
+ coll.delete()
21
+ return coll
22
+
23
+
24
+ def delete_old_collections(old=2):
25
+ collections = chroma_client.list_collections()
26
+ current_hour = int(datetime.now().strftime("%m%d%H"))
27
+
28
+ for coll in collections:
29
+ coll_hour = int(coll.metadata['date'])
30
+ if coll_hour < current_hour - old:
31
+ chroma_client.delete_collection(coll.name)
32
+
33
+
34
+ def add_texts_to_collection(coll_name: str, texts: [str], file: str, source: str):
35
+ """
36
+ add texts to a collection : texts originate all from the same file
37
+ """
38
+ coll = chroma_client.get_collection(name=coll_name)
39
+ filenames = [{file: 1, 'source': source} for _ in texts]
40
+ ids = [file+'-'+str(i) for i in range(len(texts))]
41
+ try:
42
+ coll.delete(ids=ids)
43
+ coll.add(documents=texts, metadatas=filenames, ids=ids)
44
+ except:
45
+ print(f"exception raised for collection :{coll_name}, texts: {texts} from file {file} and source {source}")
46
+
47
+
48
+ def delete_collection(coll_name: str):
49
+ chroma_client.delete_collection(name=coll_name)
50
+
51
+
52
+ def list_collections():
53
+ return chroma_client.list_collections()
54
+
55
+
56
+ def query_collection(coll_name: str, query: str, from_files: [str], n_results: int = 4):
57
+ assert 0 < len(from_files)
58
+ coll = chroma_client.get_collection(name=coll_name)
59
+ where_ = [{file: 1} for file in from_files]
60
+ where_ = where_[0] if len(where_) == 1 else {'$or': where_}
61
+ n_results_ = min(n_results, coll.count())
62
+
63
+ ans = ""
64
+ try:
65
+ ans = coll.query(query_texts=query, n_results=n_results_, where=where_)
66
+ except:
67
+ print(f"exception raised at query collection for collection {coll_name} and query {query} from files "
68
+ f"{from_files}")
69
+
70
+ return ans
src/tools/wiki.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Union
2
+
3
+ from langchain.docstore.base import Docstore
4
+ from langchain.docstore.document import Document
5
+
6
+
7
+
8
+ class Wiki(Docstore):
9
+ """
10
+ Wrapper around wikipedia API.
11
+ """
12
+
13
+ def __init__(self) -> None:
14
+ """Check that wikipedia package is installed."""
15
+ try:
16
+ import wikipedia # noqa: F401
17
+ except ImportError:
18
+ raise ValueError(
19
+ "Could not import wikipedia python package. "
20
+ "Please install it with `pip install wikipedia`."
21
+ )
22
+
23
+ @staticmethod
24
+ def fetch(searched_page: str) -> Union[str, Document]:
25
+ """
26
+ Try to fetch for wiki page.
27
+
28
+ If page exists, return the page summary, and a PageWithLookups object.
29
+ If page does not exist, return similar entries.
30
+ """
31
+ import wikipedia
32
+
33
+ try:
34
+ # wikipedia.set_lang("fr")
35
+ page_content = wikipedia.page(searched_page).content
36
+ url = wikipedia.page(searched_page).url
37
+ result: Union[str, Document] = Document(
38
+ page_content=page_content, metadata={"page": url}
39
+ )
40
+ except wikipedia.PageError:
41
+ result = f"Could not find [{searched_page}]. Similar: {wikipedia.search(searched_page)}"
42
+
43
+ except wikipedia.DisambiguationError:
44
+ result = f"Could not find [{searched_page}]. Similar: {wikipedia.search(searched_page)}"
45
+ return result
46
+
47
+ def search(searched_context: str) -> [str]:
48
+ """
49
+ Finds wiki page title in relation with the given context
50
+ """
51
+ import wikipedia
52
+
53
+ try:
54
+ # wikipedia.set_lang("fr")
55
+ page_title_list = wikipedia.search(searched_context)
56
+ result = page_title_list
57
+ except wikipedia.PageError:
58
+ result = f"Could not find [{searched_context}]."
59
+ return result
60
+
61
+
src/view/log_msg.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict
2
+
3
+ from config import config
4
+
5
+
6
+ def create_msg_from(logs: [Dict], docs) -> str:
7
+ log_messages = []
8
+ log_msg = config['log_msg']
9
+ docs_seen = []
10
+ msg = ''
11
+ for doc in docs:
12
+ for log in logs:
13
+ if doc.name in log.keys():
14
+ log = log[doc.name]
15
+ if 'options_applied' in log.keys():
16
+ msg += log_msg['options_applied']
17
+ for option in log['options_applied']:
18
+ msg += " - " + option + "\n"
19
+ if 'suppressed_styles' in log.keys():
20
+ if log['suppressed_styles']:
21
+ msg += log_msg['suppressed_styles']
22
+ for style_name in log['suppressed_styles']:
23
+ msg += " - " + style_name + "\n"
24
+ if log['modified_styles']:
25
+ msg += log_msg['modified_styles']
26
+ for style, log_s in log['modified_styles']:
27
+ msg += log_msg['modified_style'] + style + "\n"
28
+ for modif, _ in log_s:
29
+ msg += log_msg[modif] + ' '
30
+ msg += '\n'
31
+ if log['added_styles']:
32
+ msg += log_msg['added_styles']
33
+ for style_name in log['added_styles']:
34
+ msg += " - " + style_name + "\n"
35
+ if 'style_mapping' in log.keys():
36
+ msg = log['style_mapping']
37
+ if 'list_mapping' in log.keys():
38
+ msg = log['list_mapping']
39
+ if msg:
40
+ if doc not in docs_seen:
41
+ msg = log_msg['document'] + doc.name + '\n' + msg
42
+ docs_seen.append(doc)
43
+ log_messages.append(msg)
44
+ msg = ''
45
+ log_messages_str = '\n'.join(log_messages)
46
+ return log_messages_str
47
+
src/view/style_components.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+
4
+
5
+ def input_files_fn1(input_files_):
6
+ update_ = {
7
+ output_files_comp: gr.update(visible=True)
8
+ } if input_files_ else {}
9
+ return update_
src/view/test_view.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ with gr.Blocks() as test:
4
+ list_2 = ["choix21", "choix 22", "et choix 23"]
5
+ with gr.Row():
6
+ with gr.Accordion("See Details") as grac:
7
+ gr.Markdown("lorem ipsum")
8
+ hide_btn = gr.Button("hide")
9
+ show_btn = gr.Button("show")
10
+
11
+ def hide_fn():
12
+ update_ = {
13
+ grac: gr.update(open=False)
14
+ }
15
+ return update_
16
+
17
+ def show_fn():
18
+ update_ = {
19
+ grac: gr.update(open=True)
20
+ }
21
+ return update_
22
+
23
+ hide_btn.click(hide_fn,
24
+ inputs=[],
25
+ outputs=[grac])
26
+ show_btn.click(show_fn,
27
+ inputs=[],
28
+ outputs=[grac])
29
+
30
+
31
+
32
+
33
+ if __name__ == "__main__":
34
+ test.launch()
src/view/view.py ADDED
@@ -0,0 +1,533 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from typing import Dict
3
+ import asyncio
4
+ import os
5
+ from src.control.controller import Controller
6
+ from Levenshtein import distance
7
+ from src.tools.list_tool import delete_duplicate_styles
8
+
9
+
10
+ def run(config: Dict, controller: Controller):
11
+
12
+ """
13
+ =====================================================
14
+ Global variables
15
+ ================
16
+ """
17
+ controller.clear_docs()
18
+ title = "<h1 style=text-align:center;display:block;font-size:4.5em;color:#08a2d2;font-weight:bold;margin-top:4%;padding-bottom:1%>GenProp</h1>"
19
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue=gr.themes.colors.blue, secondary_hue=gr.themes.colors.orange)) as formatdoc:
20
+ gr.Markdown(title)
21
+ gr.Markdown("<p style=color:#08a2d2;font-size:1.5em;padding-bottom:2%;text-align:center>Par Hexamind</p>")
22
+ gr.Markdown("")
23
+ with gr.Row():
24
+ with gr.Column():
25
+ pass
26
+ with gr.Column(scale=10):
27
+ """
28
+ =====================================================
29
+ Input and style components
30
+ ==========================
31
+ """
32
+
33
+ gr.Markdown("<p style=font-size:1em;>Vous êtes chargé de produire une proposition commerciale</p>")
34
+
35
+
36
+ with gr.Accordion("Charger votre proposition", open=True) as input_acc:
37
+ input_files_comp = gr.File(file_count="multiple", file_types=[".docx"], label="Document")
38
+
39
+
40
+
41
+ with gr.Accordion("Appliquer les styles", open=False) as style_acc:
42
+ templates_radio = gr.Radio(
43
+ label="Templates",
44
+ choices=config['templates'],
45
+ value=config['templates'][config['default_template_index']],
46
+ )
47
+ with gr.Row():
48
+ options_btn = gr.CheckboxGroup(choices=config['options'],
49
+ label="Options",
50
+ interactive=True)
51
+ with gr.Accordion("Mapper les styles de liste", open=False) \
52
+ as list_acc:
53
+ with gr.Column(scale=2):
54
+ list_style_comps = [gr.Dropdown(visible=False, interactive=True)
55
+ for _ in range(config['max_styles'])]
56
+ with gr.Accordion("Mapper les autres styles non présents dans le template", open=False) \
57
+ as newstyles_acc:
58
+ with gr.Column(scale=2):
59
+ newstyle_comps = [gr.Dropdown(visible=False, interactive=True)
60
+ for _ in range(config['max_styles'])]
61
+
62
+ log_comp = gr.Textbox(label="Journal des modifications", visible=False)
63
+
64
+ output_styles_files_comp = gr.File(file_count="multiple", file_types=[".docx"], visible=False)
65
+
66
+ with gr.Row():
67
+ run_style_btn = gr.Button("Appliquer le template et les modifications de style", visible=False)
68
+ clear_style_btn = gr.Button("Annuler les modifications de style", visible=False)
69
+
70
+ """
71
+ ===============================================
72
+ Generation components
73
+ ======================
74
+ """
75
+ with gr.Accordion("Compléter automatiquement la proposition", open=False) as gen_acc:
76
+
77
+ generate_option_btn = gr.Radio(
78
+ label="Automatically generate a draft based on your own database",
79
+ choices=["Auto generation", "No generation"],
80
+ value="No generation",
81
+ interactive=True,
82
+ visible=False,
83
+ )
84
+
85
+ db_list_comp = gr.CheckboxGroup(
86
+ label="Base de connaissance",
87
+ info="Ces documents constituent la source de référence. Désélectionner pour qu'ils ne soient "
88
+ "pas pris en compte lors de la génération automatiqueF",
89
+ visible=True,
90
+ interactive=True,
91
+ )
92
+ db_reset_btn = gr.Button("Effacer la base de connaissance", visible=False, size="sm") \
93
+
94
+ with gr.Column(visible=True):
95
+ gr.Markdown("<p style=font-size:1em;text-align:center;>A des fins de démonstrations, la base de connaissance est alimentée depuis Wikipedia</p>")
96
+ wiki_fetch_btn = gr.Button("Rechercher les pages Wikipedia", visible=True, size="sm")
97
+ wiki_list_comp = gr.CheckboxGroup(
98
+ label="Sélectionner les pages à ajouter dans la base de connaissance",
99
+ visible=False,
100
+ interactive=True,
101
+ )
102
+
103
+ with gr.Column():
104
+ wiki_add_to_db_btn = \
105
+ gr.Button("Ajouter les documents sélectionnés à la base de connaissance",
106
+ visible=False, size="sm")
107
+
108
+ # wiki_clear_btn = gr.Button("Effacer les choix de documents", visible=False, size="sm") \
109
+
110
+ # with gr.Tab("Depuis le disque local (en cours de développement)"):
111
+ # my_files_list_comp = gr.Files(
112
+ # label="Charger ses documents",
113
+ # visible=True,
114
+ # )
115
+ # my_files_add_to_db_btn = gr.Button("Add files to sources", visible=False, size="sm")
116
+
117
+ add_close_btn = gr.Button("Close", visible=False, size="sm")
118
+ with gr.Row():
119
+ db_add_doc_btn = gr.Button("Ajouter de nouveaux documents", visible=False, size="sm")\
120
+
121
+ output_files_comp = gr.Files(file_count="multiple", visible=False)
122
+
123
+ generate_btn = gr.Button("Générer", interactive=True)
124
+
125
+ clear_btn = gr.Button('Nettoyer', visible=False)
126
+ rerun_btn = gr.Button('Relancer', visible=False)
127
+
128
+
129
+ """
130
+ ===============================================
131
+ Verification requirements components
132
+ ======================
133
+ """
134
+
135
+ with gr.Accordion("Générer la réponse aux exigences (en cours de développement)", open=False, visible=True) as exigences_acc:
136
+ input_csv_comp = gr.File(file_count="single", file_types=[".csv", "xlsx"], visible=True, label="Fichiers d'exigences (csv, xlsx only)")
137
+ with gr.Row():
138
+ verif_btn = gr.Button("Générer la réponse aux exigences (en cours de développement)", visible=False)
139
+ output_csv_comp = gr.File(file_count="single", file_types=[".csv", "xlsx"], visible=False)
140
+
141
+ gr.Markdown("")
142
+ gr.Markdown("")
143
+ gr.Markdown("<p style=font-size:1em;>Vous êtes administrateur de GenProp</p>")
144
+
145
+ with gr.Accordion("Gérer les templates", open=False) as gestions_templates_acc:
146
+ templates_radio_modif = gr.Radio(
147
+ interactive=True,
148
+ label="Templates",
149
+ choices=config['templates'],
150
+ value=config['templates'][config['default_template_index']],
151
+ )
152
+ with gr.Row():
153
+ add_template_btn = gr.UploadButton("Ajouter un template",file_count="single", file_types=[".docx"])
154
+ delete_curr_template_btn = gr.Button("Supprimer le template sélectionné")
155
+ with gr.Accordion("Gérer la base de connaissances (en cours de développement)", open=False):
156
+ pass
157
+
158
+ with gr.Column():
159
+ pass
160
+
161
+ """
162
+ ===================================================
163
+ state variables
164
+ ===============
165
+ """
166
+ wiki_source_var: [str] = gr.State([]) # list of wikipage titles of interest for the input text tasks
167
+ wiki_db_var: [str] = gr.State([]) # list of wiki document titles in the db (as seen from the UI)
168
+ my_files_db_var: [str] = gr.State([]) # list of titles of the files uploaded in the db (as seen from the UI)
169
+ db_collection_var: str = gr.State("-1") # name of the collection of documents sources in the db # list of styles to modify
170
+
171
+ """
172
+ ===================================================
173
+ Input and styles functions and listeners
174
+ ========================================
175
+ """
176
+
177
+ def input_csv_fn(input_csv_):
178
+ if not input_csv_.name.endswith('.csv') and not input_csv_.name.endswith('.xlsx'):
179
+ raise gr.Error(f'File {input_csv_.name} is not a csv or xlsx file, please upload only csv or xlsx files')
180
+ else:
181
+ controller.set_input_csv(input_csv_)
182
+ update_ = {
183
+ verif_btn: gr.update(visible=True),
184
+ }
185
+ return update_
186
+
187
+
188
+ input_csv_comp.upload(input_csv_fn,
189
+ inputs=[input_csv_comp],
190
+ outputs=[verif_btn],
191
+ )
192
+
193
+ def input_files_upload_fn(input_files_):
194
+ for files in input_files_:
195
+ if(not files.name.endswith('.docx')):
196
+ raise gr.Error(f'File {files.name} is not a docx file, please upload only docx files')
197
+ else:
198
+ continue
199
+ controller.clear_docs()
200
+ controller.copy_docs(input_files_)
201
+ update_ = {
202
+ newstyles_acc: gr.update(open=True),
203
+ style_acc: gr.update(visible=True),
204
+ run_style_btn: gr.update(visible=True),
205
+ clear_style_btn: gr.update(visible=True),
206
+ list_acc: gr.update(open=True),
207
+ }
208
+ newstyles_update = newstyles_fn()
209
+ # misapplied_styles = misapplied_styles_fn()
210
+ # for val in misapplied_styles.values():
211
+ # if val > 0:
212
+ # doc = list(misapplied_styles.keys())[list(misapplied_styles.values()).index(val)]
213
+ # gr.Warning(f"{val} paragraphs were detected in the document {doc.name} because their styles are not well applied. Please review your document for better results.")
214
+ update_.update(newstyles_update)
215
+ return update_
216
+
217
+ input_files_comp.upload(input_files_upload_fn,
218
+ inputs=[input_files_comp],
219
+ outputs=[style_acc, newstyles_acc, run_style_btn, clear_style_btn, list_acc] + newstyle_comps + list_style_comps
220
+ )
221
+
222
+ def input_file_clear_fn():
223
+ controller.clear_docs()
224
+ update_ = {
225
+ options_btn: gr.update(value=[]),
226
+ log_comp: gr.update(value="", visible=False),
227
+ output_styles_files_comp: gr.update(value=[], visible=False),
228
+ newstyles_acc: gr.update(open=False),
229
+ style_acc: gr.update(open=False),
230
+ gen_acc: gr.update(open=False),
231
+ output_files_comp: gr.update(visible=False),
232
+ run_style_btn: gr.update(visible=False),
233
+ clear_style_btn: gr.update(visible=False),
234
+ list_acc: gr.update(open=False),
235
+ exigences_acc: gr.update(value=""),
236
+ }
237
+ newstyles_update_ = newstyles_reset()
238
+ list_style_update_ = newliststyle_reset()
239
+ update_.update(newstyles_update_)
240
+ update_.update(list_style_update_)
241
+ return update_
242
+
243
+ input_files_comp.clear(
244
+ input_file_clear_fn,
245
+ inputs=[],
246
+ outputs=[options_btn, output_styles_files_comp, output_files_comp, log_comp, newstyles_acc, list_acc,
247
+ gen_acc, style_acc, run_style_btn, clear_style_btn, exigences_acc] + newstyle_comps + list_style_comps
248
+ )
249
+
250
+ def misapplied_styles_fn():
251
+ res = controller.retrieve_number_of_misapplied_styles()
252
+ return res
253
+
254
+ def newstyles_fn():
255
+ update_ = {}
256
+ update_.update(newliststyle_reset())
257
+ update_.update(newstyles_reset())
258
+ different_styles, all_template_styles = controller.get_difference_with_template()
259
+ all_template_styles_names = [style.name for style in all_template_styles]
260
+ list_styles_to_update = controller.get_list_styles()
261
+ get_label_list = lambda i: f"style: {list_styles_to_update[i]['list_style']}"
262
+ list_style_update_ = {
263
+ list_style_comps[i]: gr.update(visible=i < len(list_styles_to_update),
264
+ choices=sorted(all_template_styles_names, key=lambda x: distance(x, list_styles_to_update[i]['list_style'])),
265
+ value=None,
266
+ label=get_label_list(i)) if i < len(list_styles_to_update) else ''
267
+ for i in range(config['max_styles'])
268
+ }
269
+ update_.update(list_style_update_)
270
+ #delete styles in different_styles that are already in list_styles_to_update
271
+ different_styles = delete_duplicate_styles(list_styles_to_update, different_styles)
272
+ adapted_template_styles = []
273
+ for i in range(len(different_styles)):
274
+ adapted_template_styles.append([style.name for style in all_template_styles if style.type == different_styles[i]['style'].type])
275
+ get_label = lambda i: f"style: {different_styles[i]['style'].name}"
276
+ newstyles_update_ = {
277
+ newstyle_comps[i]: gr.update(visible=i < len(different_styles),
278
+ #sort the styles using levenstein distance function
279
+ choices=sorted(adapted_template_styles[i], key=lambda x: distance(x, different_styles[i]['style'].name)),
280
+ value=None,
281
+ label=get_label(i)) if i < len(different_styles) else ''
282
+ for i in range(len(different_styles))
283
+ }
284
+ update_.update(newstyles_update_)
285
+ return update_
286
+
287
+ def newliststyle_reset():
288
+ update_ = {
289
+ list_style_comps[i]: gr.update(visible=False,
290
+ choices=[],
291
+ value=None,
292
+ label='')
293
+ for i in range(config['max_styles'])
294
+ }
295
+ return update_
296
+
297
+ def newstyles_reset():
298
+ update_ = {
299
+ newstyle_comps[i]: gr.update(visible=False,
300
+ choices=[],
301
+ value=None,
302
+ label='')
303
+ for i in range(config['max_styles'])
304
+ }
305
+ return update_
306
+
307
+ def templates_fn(templates_):
308
+ controller.set_template(templates_)
309
+ update_ = newstyles_fn()
310
+ return update_
311
+
312
+ templates_radio.change(templates_fn,
313
+ inputs=[templates_radio],
314
+ outputs=[newstyles_acc, list_acc] + newstyle_comps + list_style_comps)
315
+
316
+ def newstyle_fns(src_index: int):
317
+ def newstyle_fn(newstyle_):
318
+ controller.update_style(src_index, newstyle_)
319
+ return newstyle_fn
320
+
321
+ def change_list_style_fn(src_index: int):
322
+ def change_list_style_fn(list_style_):
323
+ controller.update_list_style(src_index, list_style_)
324
+ return change_list_style_fn
325
+
326
+ def add_template_fn(template):
327
+ controller.add_template(template)
328
+ update_ = {
329
+ templates_radio: gr.update(choices=[t for t in os.listdir(config['templates_path']) if t.endswith((".docx"))]),
330
+ templates_radio_modif: gr.update(choices=[t for t in os.listdir(config['templates_path']) if t.endswith((".docx"))]),
331
+ }
332
+ return update_
333
+
334
+ def delete_curr_template_fn(template):
335
+ controller.delete_curr_template(template)
336
+ update_ = {
337
+ templates_radio: gr.update(choices=[t for t in os.listdir(config['templates_path']) if t.endswith((".docx"))]),
338
+ templates_radio_modif: gr.update(choices=[t for t in os.listdir(config['templates_path']) if t.endswith((".docx"))]),
339
+ options_btn: gr.update(value=[]),
340
+ log_comp: gr.update(value="", visible=False),
341
+ output_styles_files_comp: gr.update(value=[], visible=False),
342
+ newstyles_acc: gr.update(open=False),
343
+ run_style_btn: gr.update(visible=True),
344
+ list_acc: gr.update(open=False),
345
+ }
346
+ return update_
347
+
348
+ add_template_btn.upload(add_template_fn,
349
+ inputs=[add_template_btn],
350
+ outputs=[templates_radio,templates_radio_modif])
351
+
352
+ delete_curr_template_btn.click(delete_curr_template_fn,
353
+ inputs=[templates_radio],
354
+ outputs=[templates_radio, options_btn, log_comp, output_styles_files_comp, newstyles_acc, run_style_btn, list_acc, templates_radio_modif])
355
+
356
+ for src_index, newstyle_comp in enumerate(newstyle_comps):
357
+ newstyle_comp.input(newstyle_fns(src_index), inputs=[newstyle_comp], outputs=[],show_progress="full")
358
+
359
+ for src_index, list_style_comp in enumerate(list_style_comps):
360
+ list_style_comp.input(change_list_style_fn(src_index), inputs=[list_style_comp], outputs=[],show_progress="full")
361
+
362
+ def clear_style_fn(input_files_):
363
+ controller.clear_docs()
364
+ if input_files_:
365
+ controller.copy_docs(input_files_)
366
+ controller.set_template()
367
+ update_ = {
368
+ options_btn: gr.update(value=[]),
369
+ log_comp: gr.update(value="", visible=False),
370
+ output_styles_files_comp: gr.update(value=[], visible=False),
371
+ newstyles_acc: gr.update(open=False),
372
+ run_style_btn: gr.update(visible=True),
373
+ list_acc: gr.update(open=False),
374
+ templates_radio: gr.update(value=config['templates'][config['default_template_index']]),
375
+ }
376
+ newstyles_update_ = newstyles_fn()
377
+ update_.update(newstyles_update_)
378
+ return update_
379
+
380
+ clear_style_btn.click(clear_style_fn,
381
+ inputs=[input_files_comp],
382
+ outputs=[options_btn, output_styles_files_comp, log_comp, newstyles_acc, list_acc, run_style_btn, templates_radio]
383
+ + newstyle_comps + list_style_comps
384
+ )
385
+
386
+ def run_style_fn(options_btn_):
387
+ print(f"options activated : {options_btn_}")
388
+ controller.apply_template(options_btn_)
389
+ log = controller.get_log()
390
+ new_docs_path = controller.generated_docs_path
391
+ output_paths = [f"{new_docs_path}/{f}" for f in os.listdir(new_docs_path)]
392
+ print(f"output_paths: {output_paths}")
393
+ update_ = {
394
+ log_comp: gr.update(value=log, visible=True),
395
+ output_styles_files_comp: gr.update(value=output_paths, visible=True),
396
+ run_style_btn: gr.update(visible=False),
397
+ }
398
+ return update_
399
+
400
+
401
+ run_style_btn.click(run_style_fn,
402
+ inputs=[options_btn],
403
+ outputs=[log_comp, output_styles_files_comp, run_style_btn] + newstyle_comps, show_progress="full")
404
+
405
+ """
406
+ =====================================================
407
+ Generation functions
408
+ ====================
409
+ """
410
+
411
+ def generate_option_fn(db_collection_):
412
+ id_ = controller.get_or_create_collection(db_collection_)
413
+ update_ = {
414
+ db_collection_var: id_,
415
+ }
416
+ return update_
417
+
418
+ def wiki_fetch1_fn():
419
+ """
420
+ fetch the wikifiles interesting for solving the tasks as defined in the input doc
421
+ """
422
+ update_ = {
423
+ wiki_list_comp: gr.update(visible=True),
424
+ }
425
+ return update_
426
+
427
+ async def wiki_fetch2_fn():
428
+ """
429
+ fetch the wikifiles interesting for solving the tasks as defined in the input doc
430
+ """
431
+ wiki_interesting_files = await controller.wiki_fetch()
432
+ print(f"wiki_interesting_files: {wiki_interesting_files}")
433
+ wiki_files = wiki_interesting_files # [w for w in wiki_interesting_files if w not in wiki_db_files_]
434
+ update_ = {
435
+ wiki_list_comp: gr.update(visible=True, value=[], choices=wiki_files),
436
+ wiki_source_var: wiki_interesting_files,
437
+ wiki_add_to_db_btn: gr.update(visible=True),
438
+ # wiki_clear_btn: gr.update(visible=True), #Button to clear the choices that are by default all ticked
439
+ }
440
+ return update_
441
+
442
+ async def wiki_add_to_db_fn(wiki_list_, wiki_source_, wiki_db_, db_list_, db_collection_):
443
+ """
444
+ adds the wikipages to the db source
445
+ """
446
+ wiki_to_add = [wiki for wiki in wiki_list_ if wiki not in wiki_db_]
447
+ db_list_ += wiki_to_add
448
+ wiki_db_ += wiki_to_add
449
+ wiki_source_remaining = [wiki for wiki in wiki_source_ if wiki not in wiki_db_]
450
+ async_upload_and_store_tasks = [asyncio.create_task(controller.wiki_upload_and_store(wiki, db_collection_)) for wiki in wiki_to_add] # A DEPLACER DANS LE CONTROLLER
451
+ await asyncio.gather(*async_upload_and_store_tasks)
452
+ db_not_empty = 0 < len(db_list_)
453
+ wiki_to_add_not_empty = 0 < len(wiki_source_remaining)
454
+ update_ = {
455
+ wiki_db_var: wiki_db_,
456
+ wiki_list_comp: gr.update(value=[], choices=wiki_source_remaining),
457
+ wiki_add_to_db_btn: gr.update(visible=wiki_to_add_not_empty),
458
+ db_list_comp: gr.update(
459
+ visible=True,
460
+ value=db_list_,
461
+ choices=db_list_,
462
+ label="Database content"),
463
+ db_reset_btn: gr.update(visible=db_not_empty),
464
+ generate_btn: gr.update(visible=True, interactive=db_not_empty),
465
+ }
466
+ return update_
467
+
468
+ def generate_fn1():
469
+ update_ = {
470
+ output_files_comp: gr.update(visible=True)
471
+ }
472
+ return update_
473
+
474
+ async def generate_fn2(db_collection_, db_list_):
475
+ output_files = await controller.generate_doc_from_db(collection_name=db_collection_,
476
+ from_files=db_list_)
477
+ update_ = {
478
+ output_files_comp: gr.update(value=output_files, visible=True),
479
+ }
480
+ return update_
481
+
482
+
483
+ """
484
+ =====================================================
485
+ Generation listeners
486
+ ====================
487
+ """
488
+
489
+ wiki_fetch_btn \
490
+ .click(wiki_fetch1_fn, inputs=[], outputs=[wiki_list_comp]) \
491
+ .then(wiki_fetch2_fn,
492
+ inputs=[],
493
+ outputs=[wiki_list_comp, wiki_source_var, wiki_add_to_db_btn])
494
+
495
+ wiki_add_to_db_btn\
496
+ .click(generate_option_fn,
497
+ inputs=[db_collection_var],
498
+ outputs=[db_collection_var])\
499
+ .then(wiki_add_to_db_fn,
500
+ inputs=[wiki_list_comp, wiki_source_var, wiki_db_var, db_list_comp, db_collection_var],
501
+ outputs=[db_list_comp, wiki_list_comp, wiki_db_var,
502
+ generate_btn, wiki_add_to_db_btn, db_reset_btn])
503
+
504
+ generate_btn\
505
+ .click(generate_fn1,
506
+ inputs=[],
507
+ outputs=[output_files_comp])\
508
+ .then(generate_fn2,
509
+ inputs=[db_collection_var, db_list_comp],
510
+ outputs=[output_files_comp])
511
+
512
+
513
+ """
514
+ =====================================================
515
+ Clear and rerun functions and listeners
516
+ =======================================
517
+ """
518
+
519
+ def clear_fn():
520
+ update_ = {
521
+ input_files_comp: gr.update(value=None),
522
+ output_files_comp: gr.update(value=None, visible=False),
523
+ clear_btn: gr.update(visible=False),
524
+ rerun_btn: gr.update(visible=False),
525
+ }
526
+ return update_
527
+
528
+ clear_btn.click(clear_fn,
529
+ inputs=[],
530
+ outputs=[input_files_comp, output_files_comp, clear_btn, rerun_btn])
531
+
532
+ # wiki_clear_btn.click(clear_choices_fn, inputs=[], outputs=[wiki_list_comp]) #listener for the clear button of the wiki choices
533
+ return formatdoc
temp/generated_files/file.txt ADDED
File without changes