/** * @author jdiaz5513 */ import { PACK_SPAN_THRESHOLD } from "../constants"; import { MSG_PACK_NOT_WORD_ALIGNED } from "../errors"; /** * When packing a message there are two tags that are interpreted in a special way: `0x00` and `0xff`. * * @enum {number} */ const enum PackedTag { /** * The tag is followed by a single byte which indicates a count of consecutive zero-valued words, minus 1. E.g. if the * tag 0x00 is followed by 0x05, the sequence unpacks to 6 words of zero. * * Or, put another way: the tag is first decoded as if it were not special. Since none of the bits are set, it is * followed by no bytes and expands to a word full of zeros. After that, the next byte is interpreted as a count of * additional words that are also all-zero. */ ZERO = 0x00, /** * The tag is followed by the bytes of the word (as if it weren’t special), but after those bytes is another byte with * value N. Following that byte is N unpacked words that should be copied directly. * * These unpacked words may contain zeroes; in this implementation a minimum of PACK_SPAN_THRESHOLD zero bytes are * written before ending the span. * * The purpose of this rule is to minimize the impact of packing on data that doesn’t contain any zeros – in * particular, long text blobs. Because of this rule, the worst-case space overhead of packing is 2 bytes per 2 KiB of * input (256 words = 2KiB). */ SPAN = 0xff, } /** * Compute the Hamming weight (number of bits set to 1) of a number. Used to figure out how many bytes follow a tag byte * while computing the size of a packed message. * * WARNING: Using this with floating point numbers will void your warranty. * * @param {number} x A real integer. * @returns {number} The hamming weight (integer). */ export function getHammingWeight(x: number): number { // Thanks, HACKMEM! let w = x - ((x >> 1) & 0x55555555); w = (w & 0x33333333) + ((w >> 2) & 0x33333333); return (((w + (w >> 4)) & 0x0f0f0f0f) * 0x01010101) >> 24; } export type byte = number; /** * Compute the tag byte from the 8 bytes of a 64-bit word. * * @param {byte} a The first byte. * @param {byte} b The second byte. * @param {byte} c The third byte. * @param {byte} d The fourth byte. * @param {byte} e The fifth byte. * @param {byte} f The sixth byte. * @param {byte} g The seventh byte. * @param {byte} h The eighth byte (phew!). * @returns {number} The tag byte. */ export function getTagByte(a: byte, b: byte, c: byte, d: byte, e: byte, f: byte, g: byte, h: byte): number { // Yes, it's pretty. Don't touch it. return ( (a === 0 ? 0 : 0b00000001) | (b === 0 ? 0 : 0b00000010) | (c === 0 ? 0 : 0b00000100) | (d === 0 ? 0 : 0b00001000) | (e === 0 ? 0 : 0b00010000) | (f === 0 ? 0 : 0b00100000) | (g === 0 ? 0 : 0b01000000) | (h === 0 ? 0 : 0b10000000) ); } /** * Efficiently calculate the length of a packed Cap'n Proto message. * * @export * @param {ArrayBuffer} packed The packed message. * @returns {number} The length of the unpacked message in bytes. */ export function getUnpackedByteLength(packed: ArrayBuffer): number { const p = new Uint8Array(packed); let wordLength = 0; let lastTag = 0x77; for (let i = 0; i < p.byteLength; ) { const tag = p[i]; if (lastTag === PackedTag.ZERO) { wordLength += tag; i++; lastTag = 0x77; } else if (lastTag === PackedTag.SPAN) { wordLength += tag; i += tag * 8 + 1; lastTag = 0x77; } else { wordLength++; i += getHammingWeight(tag) + 1; lastTag = tag; } } return wordLength * 8; } /** * Compute the number of zero bytes that occur in a given 64-bit word, provided as eight separate bytes. * * @param {byte} a The first byte. * @param {byte} b The second byte. * @param {byte} c The third byte. * @param {byte} d The fourth byte. * @param {byte} e The fifth byte. * @param {byte} f The sixth byte. * @param {byte} g The seventh byte. * @param {byte} h The eighth byte (phew!). * @returns {number} The number of these bytes that are zero. */ export function getZeroByteCount(a: byte, b: byte, c: byte, d: byte, e: byte, f: byte, g: byte, h: byte): number { return ( (a === 0 ? 1 : 0) + (b === 0 ? 1 : 0) + (c === 0 ? 1 : 0) + (d === 0 ? 1 : 0) + (e === 0 ? 1 : 0) + (f === 0 ? 1 : 0) + (g === 0 ? 1 : 0) + (h === 0 ? 1 : 0) ); } /** * Pack a section of a Cap'n Proto message into a compressed format. This will efficiently compress zero bytes (which * are common in idiomatic Cap'n Proto messages) into a compact form. * * For stream-framed messages this is called once for the frame header and once again for each segment in the message. * * The returned array buffer is trimmed to the exact size of the packed message with a single copy operation at the end. * This should be decent on CPU time but does require quite a lot of memory (a normal array is filled up with each * packed byte until the packing is complete). * * @export * @param {ArrayBuffer} unpacked The message to pack. * @param {number} [byteOffset] Starting byte offset to read bytes from, defaults to 0. * @param {number} [byteLength] Total number of bytes to read, defaults to the remainder of the buffer contents. * @returns {ArrayBuffer} A packed version of the message. */ export function pack(unpacked: ArrayBuffer, byteOffset = 0, byteLength?: number): ArrayBuffer { if (unpacked.byteLength % 8 !== 0) throw new Error(MSG_PACK_NOT_WORD_ALIGNED); const src = new Uint8Array(unpacked, byteOffset, byteLength); // TODO: Maybe we should do this with buffers? This costs more than 8x the final compressed size in temporary RAM. const dst: number[] = []; /* Just have to be sure it's neither ZERO nor SPAN. */ let lastTag = 0x77; /** This is where we need to remember to write the SPAN tag (0xff). */ let spanTagOffset = NaN; /** How many words have been copied during the current span. */ let spanWordLength = 0; /** * When this hits zero, we've had PACK_SPAN_THRESHOLD zero bytes pass by and it's time to bail from the span. */ let spanThreshold = PACK_SPAN_THRESHOLD; for (let srcByteOffset = 0; srcByteOffset < src.byteLength; srcByteOffset += 8) { /** Read in the entire word. Yes, this feels silly but it's fast! */ const a = src[srcByteOffset]; const b = src[srcByteOffset + 1]; const c = src[srcByteOffset + 2]; const d = src[srcByteOffset + 3]; const e = src[srcByteOffset + 4]; const f = src[srcByteOffset + 5]; const g = src[srcByteOffset + 6]; const h = src[srcByteOffset + 7]; const tag = getTagByte(a, b, c, d, e, f, g, h); /** If this is true we'll skip the normal word write logic after the switch statement. */ let skipWriteWord = true; switch (lastTag) { case PackedTag.ZERO: // We're writing a span of words with all zeroes in them. See if we need to bail out of the fast path. if (tag !== PackedTag.ZERO || spanWordLength >= 0xff) { // There's a bit in there or we got too many zeroes. Damn, we need to bail. dst.push(spanWordLength); spanWordLength = 0; skipWriteWord = false; } else { // Kay, let's quickly inc this and go. spanWordLength++; } break; case PackedTag.SPAN: { // We're writing a span of nonzero words. const zeroCount = getZeroByteCount(a, b, c, d, e, f, g, h); // See if we need to bail now. spanThreshold -= zeroCount; if (spanThreshold <= 0 || spanWordLength >= 0xff) { // Alright, time to get packing again. Write the number of words we skipped to the beginning of the span. dst[spanTagOffset] = spanWordLength; spanWordLength = 0; spanThreshold = PACK_SPAN_THRESHOLD; // We have to write this word normally. skipWriteWord = false; } else { // Just write this word verbatim. dst.push(a, b, c, d, e, f, g, h); spanWordLength++; } break; } default: // Didn't get a special tag last time, let's write this as normal. skipWriteWord = false; break; } // A goto is fast, idk why people keep hatin'. if (skipWriteWord) continue; dst.push(tag); lastTag = tag; if (a !== 0) dst.push(a); if (b !== 0) dst.push(b); if (c !== 0) dst.push(c); if (d !== 0) dst.push(d); if (e !== 0) dst.push(e); if (f !== 0) dst.push(f); if (g !== 0) dst.push(g); if (h !== 0) dst.push(h); // Record the span tag offset if needed, making sure to actually leave room for it. if (tag === PackedTag.SPAN) { spanTagOffset = dst.length; dst.push(0); } } // We're done. If we were writing a span let's finish it. if (lastTag === PackedTag.ZERO) { dst.push(spanWordLength); } else if (lastTag === PackedTag.SPAN) { dst[spanTagOffset] = spanWordLength; } return new Uint8Array(dst).buffer; } /** * Unpack a compressed Cap'n Proto message into a new ArrayBuffer. * * Unlike the `pack` function, this is able to efficiently determine the exact size needed for the output buffer and * runs considerably more efficiently. * * @export * @param {ArrayBuffer} packed An array buffer containing the packed message. * @returns {ArrayBuffer} The unpacked message. */ export function unpack(packed: ArrayBuffer): ArrayBuffer { // We have no choice but to read the packed buffer one byte at a time. const src = new Uint8Array(packed); const dst = new Uint8Array(new ArrayBuffer(getUnpackedByteLength(packed))); /** The last tag byte that we've seen - it starts at a "neutral" value. */ let lastTag = 0x77; for (let srcByteOffset = 0, dstByteOffset = 0; srcByteOffset < src.byteLength; ) { const tag = src[srcByteOffset]; if (lastTag === PackedTag.ZERO) { // We have a span of zeroes. New array buffers are guaranteed to be initialized to zero so we just seek ahead. dstByteOffset += tag * 8; srcByteOffset++; lastTag = 0x77; } else if (lastTag === PackedTag.SPAN) { // We have a span of unpacked bytes. Copy them verbatim from the source buffer. const spanByteLength = tag * 8; dst.set(src.subarray(srcByteOffset + 1, srcByteOffset + 1 + spanByteLength), dstByteOffset); dstByteOffset += spanByteLength; srcByteOffset += 1 + spanByteLength; lastTag = 0x77; } else { // Okay, a normal tag. Let's read past the tag and copy bytes that have a bit set in the tag. srcByteOffset++; for (let i = 1; i <= 0b10000000; i <<= 1) { // We only need to actually touch `dst` if there's a nonzero byte (it's already initialized to zeroes). if ((tag & i) !== 0) dst[dstByteOffset] = src[srcByteOffset++]; dstByteOffset++; } lastTag = tag; } } return dst.buffer; }