| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| from __future__ import absolute_import |
| from __future__ import print_function |
| from __future__ import unicode_literals |
| from argparse import ArgumentParser |
| from contextlib import contextmanager |
| from contextlib import closing |
| from functools import partial |
| import gettext |
| import io |
| import logging |
| import os.path |
| import shutil |
| import shutil |
| import sys |
| import base64 |
| import re |
| import tempfile |
| import mimetypes |
|
|
| from . import __version__ as version |
| from .cli import init_logger |
| from .transforms import BaseTransform |
| from .utils import cached_property |
|
|
|
|
| PY3 = sys.version_info.major == 3 |
| logger = logging.getLogger(__name__) |
| locale_dir = os.path.join(os.path.dirname(__file__), 'locale') |
| locale_dir = os.path.abspath(locale_dir) |
| t = gettext.translation('hwp5html', locale_dir, fallback=True) |
| _ = t.gettext |
|
|
|
|
| RESOURCE_PATH_XSL_CSS = 'xsl/hwp5css.xsl' |
| RESOURCE_PATH_XSL_XHTML = 'xsl/hwp5html.xsl' |
|
|
|
|
| class HTMLTransform(BaseTransform): |
|
|
| @property |
| def transform_hwp5_to_css(self): |
| ''' |
| >>> T.transform_hwp5_to_css(hwp5file, 'styles.css') |
| ''' |
| transform_xhwp5 = self.transform_xhwp5_to_css |
| return self.make_transform_hwp5(transform_xhwp5) |
|
|
| @property |
| def transform_hwp5_to_xhtml(self): |
| ''' |
| >>> T.transform_hwp5_to_xhtml(hwp5file, 'index.xhtml') |
| ''' |
| transform_xhwp5 = self.transform_xhwp5_to_xhtml |
| return self.make_transform_hwp5(transform_xhwp5) |
|
|
| def transform_hwp5_to_dir(self, hwp5file, outdir): |
| ''' |
| >>> T.transform_hwp5_to_dir(hwp5file, 'output') |
| ''' |
| with self.transformed_xhwp5_at_temp(hwp5file) as xhwp5path: |
| self.transform_xhwp5_to_dir(xhwp5path, outdir) |
|
|
| bindata_dir = os.path.join(outdir, 'bindata') |
| self.extract_bindata_dir(hwp5file, bindata_dir) |
|
|
| @cached_property |
| def transform_xhwp5_to_css(self): |
| ''' |
| >>> T.transform_xhwp5_to_css('hwp5.xml', 'styles.css') |
| ''' |
| resource_path = RESOURCE_PATH_XSL_CSS |
| return self.make_xsl_transform(resource_path) |
|
|
| @cached_property |
| def transform_xhwp5_to_xhtml(self): |
| ''' |
| >>> T.transform_xhwp5_to_xhtml('hwp5.xml', 'index.xhtml') |
| ''' |
| resource_path = RESOURCE_PATH_XSL_XHTML |
| return self.make_xsl_transform(resource_path) |
|
|
| def transform_xhwp5_to_dir(self, xhwp5path, outdir): |
| ''' |
| >>> T.transform_xhwp5_to_dir('hwp5.xml', 'output') |
| ''' |
| html_path = os.path.join(outdir, 'index.xhtml') |
| with io.open(html_path, 'wb') as f: |
| self.transform_xhwp5_to_xhtml(xhwp5path, f) |
|
|
| css_path = os.path.join(outdir, 'styles.css') |
| with io.open(css_path, 'wb') as f: |
| self.transform_xhwp5_to_css(xhwp5path, f) |
|
|
| def transform_hwp5_to_single(self, hwp5file, outpath): |
| """ |
| Convert HWP file to a single HTML file with embedded CSS and images. |
| """ |
| |
| with tempfile.TemporaryDirectory() as temp_dir: |
| |
| self.transform_hwp5_to_dir(hwp5file, temp_dir) |
| |
| |
| html_path = os.path.join(temp_dir, 'index.xhtml') |
| css_path = os.path.join(temp_dir, 'styles.css') |
| bindata_dir = os.path.join(temp_dir, 'bindata') |
| |
| |
| if os.path.exists(html_path): |
| with io.open(html_path, 'r', encoding='utf-8') as f: |
| html_content = f.read() |
| else: |
| raise RuntimeError("HTML generation failed") |
| |
| css_content = "" |
| if os.path.exists(css_path): |
| with io.open(css_path, 'r', encoding='utf-8') as f: |
| css_content = f.read() |
| |
| |
| |
| if css_content: |
| style_tag = f'<style>\n{css_content}\n</style>\n' |
| html_content = html_content.replace('</head>', f'{style_tag}</head>') |
| |
| |
| html_content = re.sub(r'<link[^>]+href="styles.css"[^>]*/>', '', html_content) |
|
|
| |
| if os.path.exists(bindata_dir): |
| |
| def replace_image(match): |
| src = match.group(1) |
| if src.startswith('bindata/'): |
| image_filename = os.path.basename(src) |
| image_path = os.path.join(bindata_dir, image_filename) |
| if os.path.exists(image_path): |
| |
| mime_type, _ = mimetypes.guess_type(image_path) |
| if not mime_type: |
| mime_type = 'image/png' |
| |
| with open(image_path, 'rb') as img_f: |
| img_data = img_f.read() |
| b64_data = base64.b64encode(img_data).decode('ascii') |
| return f'src="data:{mime_type};base64,{b64_data}"' |
| return match.group(0) |
| |
| |
| |
| html_content = re.sub(r'src="(bindata/[^"]+)"', replace_image, html_content) |
| |
| |
| with io.open(outpath, 'w', encoding='utf-8') as f: |
| f.write(html_content) |
|
|
| def extract_bindata_dir(self, hwp5file, bindata_dir): |
| if 'BinData' not in hwp5file: |
| return |
| bindata_stg = hwp5file['BinData'] |
| if not os.path.exists(bindata_dir): |
| os.mkdir(bindata_dir) |
|
|
| from hwp5.storage import unpack |
| unpack(bindata_stg, bindata_dir) |
|
|
|
|
| def main(): |
| from .dataio import ParseError |
| from .errors import InvalidHwp5FileError |
| from .utils import make_open_dest_file |
| from .xmlmodel import Hwp5File |
|
|
| argparser = main_argparser() |
| args = argparser.parse_args() |
| init_logger(args) |
|
|
| hwp5path = args.hwp5file |
|
|
| html_transform = HTMLTransform() |
|
|
| open_dest = make_open_dest_file(args.output) |
| if args.css: |
| transform = html_transform.transform_hwp5_to_css |
| open_dest = wrap_for_css(open_dest) |
| elif args.html: |
| transform = html_transform.transform_hwp5_to_xhtml |
| open_dest = wrap_for_xml(open_dest) |
| elif args.embed_image: |
| transform = html_transform.transform_hwp5_to_single |
| |
| |
| if not args.output: |
| args.output = os.path.splitext(os.path.basename(hwp5path))[0] + '.html' |
| open_dest = lambda: contextmanager(lambda: (yield args.output))() |
| else: |
| transform = html_transform.transform_hwp5_to_dir |
| dest_path = args.output |
| if not dest_path: |
| dest_path = os.path.splitext(os.path.basename(hwp5path))[0] |
| open_dest = partial(open_dir, dest_path) |
|
|
| print(f"DEBUG: Input file: {hwp5path}") |
| print(f"DEBUG: Args: css={args.css}, html={args.html}, embed_image={getattr(args, 'embed_image', False)}") |
|
|
| try: |
| with closing(Hwp5File(hwp5path)) as hwp5file: |
| with open_dest() as dest: |
| print(f"DEBUG: Starting transformation using {transform}") |
| transform(hwp5file, dest) |
| print("DEBUG: Transformation finished") |
| except Exception as e: |
| import traceback |
| traceback.print_exc() |
| logger.error('%s', e) |
| sys.exit(1) |
|
|
|
|
| def main_argparser(): |
| parser = ArgumentParser( |
| prog='hwp5html', |
| description=_('HWPv5 to HTML converter'), |
| ) |
| parser.add_argument( |
| '--version', |
| action='version', |
| version='%(prog)s {}'.format(version) |
| ) |
| parser.add_argument( |
| '--loglevel', |
| help=_('Set log level.'), |
| ) |
| parser.add_argument( |
| '--logfile', |
| help=_('Set log file.'), |
| ) |
| parser.add_argument( |
| '--output', |
| help=_('Output file'), |
| ) |
| parser.add_argument( |
| 'hwp5file', |
| metavar='<hwp5file>', |
| help=_('.hwp file to convert'), |
| ) |
| generator_group = parser.add_mutually_exclusive_group() |
| generator_group.add_argument( |
| '--css', |
| action='store_true', |
| help=_('Generate CSS'), |
| ) |
| generator_group.add_argument( |
| '--html', |
| action='store_true', |
| help=_('Generate HTML'), |
| ) |
| generator_group.add_argument( |
| '--embed-image', |
| action='store_true', |
| help=_('Embed images and CSS into a single HTML file'), |
| ) |
| return parser |
|
|
|
|
| @contextmanager |
| def open_dir(path): |
| if os.path.exists(path): |
| shutil.rmtree(path) |
| os.mkdir(path) |
| yield path |
|
|
|
|
| def wrap_for_css(open_dest): |
| from .utils import wrap_open_dest_for_tty |
| from .utils import pager |
| from .utils import syntaxhighlight |
| return wrap_open_dest_for_tty(open_dest, [ |
| pager(), |
| syntaxhighlight('text/css'), |
| ]) |
|
|
|
|
| def wrap_for_xml(open_dest): |
| from .utils import wrap_open_dest_for_tty |
| from .utils import pager |
| from .utils import syntaxhighlight |
| from .utils import xmllint |
| return wrap_open_dest_for_tty(open_dest, [ |
| pager(), |
| syntaxhighlight('application/xml'), |
| xmllint(format=True, nonet=True), |
| ]) |
|
|
|
|
| if __name__ == '__main__': |
| main() |
|
|