File size: 1,724 Bytes
7263d32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import re
from datetime import datetime

import jinja2


class XMLHelper:
    def __init__(self, xml_file_name="page_xml.xml"):
        self.xml_file_name = xml_file_name
        self.searchpath = "./src/htr_pipeline/utils/templates"
        self.template = "page_xml_2013.xml"

    def render(self, template_data):
        rendered_xml = self._render_xml(template_data)
        return rendered_xml

    def _transform_coords(self, input_string):
        pattern = r"\[\s*([^\s,]+)\s*,\s*([^\s\]]+)\s*\]"
        replacement = r"\1,\2"
        return re.sub(pattern, replacement, input_string)

    def _render_xml(self, template_data):
        template_loader = jinja2.FileSystemLoader(searchpath=self.searchpath)
        template_env = jinja2.Environment(loader=template_loader, trim_blocks=True)
        template = template_env.get_template(self.template)
        rendered_xml = template.render(template_data)
        rendered_xml = self._transform_coords(rendered_xml)
        return rendered_xml

    def prepare_template_data(self, img_file_name, image):
        img_height = image.shape[0]
        img_width = image.shape[1]

        now = datetime.now()
        date_time = now.strftime("%Y-%m-%d, %H:%M:%S")
        return {
            "created": date_time,
            "imageFilename": img_file_name,
            "imageWidth": img_width,
            "imageHeight": img_height,
            "textRegions": list(),
        }

    def escape_xml_chars(self, textline):
        return (
            textline.replace("&", "&")
            .replace("<", "&lt;")
            .replace(">", "&gt;")
            .replace("'", "&apos;")
            .replace('"', "&quot;")
        )


if __name__ == "__main__":
    pass