| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | from __future__ import absolute_import |
| | from __future__ import print_function |
| | from __future__ import unicode_literals |
| | from itertools import islice |
| | import json |
| | import struct |
| |
|
| | from . import dataio |
| | from . import filestructure |
| | from .dataio import dumpbytes |
| | from .dataio import Eof |
| | from .dataio import UINT32 |
| | from .tagids import HWPTAG_BEGIN |
| | from .tagids import tagnames |
| | from .utils import JsonObjects |
| |
|
| |
|
| | def tagname(tagid): |
| | return tagnames.get(tagid, 'HWPTAG%d' % (tagid - HWPTAG_BEGIN)) |
| |
|
| |
|
| | def Record(tagid, level, payload, size=None, seqno=None): |
| | if size is None: |
| | size = len(payload) |
| | d = dict(tagid=tagid, tagname=tagname(tagid), level=level, |
| | size=size, payload=payload) |
| | if seqno is not None: |
| | d['seqno'] = seqno |
| | return d |
| |
|
| |
|
| | def decode_record_header(f): |
| | try: |
| | |
| | rechdr = UINT32.read(f) |
| | tagid = rechdr & 0x3ff |
| | level = (rechdr >> 10) & 0x3ff |
| | size = (rechdr >> 20) & 0xfff |
| | if size == 0xfff: |
| | size = UINT32.read(f) |
| | return (tagid, level, size) |
| | except Eof: |
| | return None |
| |
|
| |
|
| | def encode_record_header(rec): |
| | size = len(rec['payload']) |
| | level = rec['level'] |
| | tagid = rec['tagid'] |
| | if size < 0xfff: |
| | hdr = (size << 20) | (level << 10) | tagid |
| | return struct.pack('<I', hdr) |
| | else: |
| | hdr = (0xfff << 20) | (level << 10) | tagid |
| | return struct.pack('<II', hdr, size) |
| |
|
| |
|
| | def read_record(f, seqno): |
| | header = decode_record_header(f) |
| | if header is None: |
| | return |
| | tagid, level, size = header |
| | payload = dataio.readn(f, size) |
| | return Record(tagid, level, payload, size, seqno) |
| |
|
| |
|
| | def dump_record(f, record): |
| | hdr = encode_record_header(record) |
| | f.write(hdr) |
| | f.write(record['payload']) |
| |
|
| |
|
| | def read_records(f): |
| | seqno = 0 |
| | while True: |
| | record = read_record(f, seqno) |
| | if record: |
| | yield record |
| | else: |
| | return |
| | seqno += 1 |
| |
|
| |
|
| | def link_records(records): |
| | prev = None |
| | for rec in records: |
| | if prev is not None: |
| | if rec['level'] == prev['level']: |
| | rec['sister'] = prev |
| | rec['parent'] = prev.get('parent') |
| | elif rec['level'] == prev['level'] + 1: |
| | rec['parent'] = prev |
| | yield rec |
| | prev = rec |
| |
|
| |
|
| | def record_to_json(record, *args, **kwargs): |
| | ''' convert a record to json ''' |
| | record['payload'] = list(dumpbytes(record['payload'])) |
| | return json.dumps(record, *args, **kwargs) |
| |
|
| |
|
| | def nth(iterable, n, default=None): |
| | try: |
| | return next(islice(iterable, n, None)) |
| | except StopIteration: |
| | return default |
| |
|
| |
|
| | def group_records_by_toplevel(records, group_as_list=True): |
| | ''' group records by top-level trees and return iterable of the groups |
| | ''' |
| | context = dict() |
| |
|
| | try: |
| | context['top'] = next(records) |
| | except StopIteration: |
| | return |
| |
|
| | def records_in_a_tree(): |
| | yield context.pop('top') |
| |
|
| | for record in records: |
| | if record['level'] == 0: |
| | context['top'] = record |
| | return |
| | yield record |
| |
|
| | while 'top' in context: |
| | group = records_in_a_tree() |
| | if group_as_list: |
| | group = list(group) |
| | yield group |
| |
|
| |
|
| | class RecordStream(filestructure.VersionSensitiveItem): |
| |
|
| | def records(self, **kwargs): |
| | records = read_records(self.open()) |
| | if 'range' in kwargs: |
| | range = kwargs['range'] |
| | records = islice(records, range[0], range[1]) |
| | elif 'treegroup' in kwargs: |
| | groups = group_records_by_toplevel(records, group_as_list=True) |
| | records = nth(groups, kwargs['treegroup']) |
| | return records |
| |
|
| | def record(self, idx): |
| | ''' get the record at `idx' ''' |
| | return nth(self.records(), idx) |
| |
|
| | def records_json(self, **kwargs): |
| | records = self.records(**kwargs) |
| | return JsonObjects(records, record_to_json) |
| |
|
| | def records_treegrouped(self, group_as_list=True): |
| | ''' group records by top-level trees and return iterable of the groups |
| | ''' |
| | records = self.records() |
| | return group_records_by_toplevel(records, group_as_list) |
| |
|
| | def records_treegroup(self, n): |
| | ''' returns list of records in `n'th top-level tree ''' |
| | groups = self.records_treegrouped() |
| | return nth(groups, n) |
| |
|
| | def other_formats(self): |
| | return {'.records': self.records_json().open} |
| |
|
| |
|
| | class Sections(filestructure.Sections): |
| |
|
| | section_class = RecordStream |
| |
|
| |
|
| | class Hwp5File(filestructure.Hwp5File): |
| | ''' Hwp5File for 'rec' layer |
| | ''' |
| |
|
| | docinfo_class = RecordStream |
| | bodytext_class = Sections |
| |
|