Spaces:

W404NET
/

Telegram-Chat-Bot

Configuration error

File size: 11,079 Bytes
/**
 * @author jdiaz5513
 */

import { PACK_SPAN_THRESHOLD } from "../constants";
import { MSG_PACK_NOT_WORD_ALIGNED } from "../errors";

/**
 * When packing a message there are two tags that are interpreted in a special way: `0x00` and `0xff`.
 *
 * @enum {number}
 */

const enum PackedTag {
  /**
   * The tag is followed by a single byte which indicates a count of consecutive zero-valued words, minus 1. E.g. if the
   * tag 0x00 is followed by 0x05, the sequence unpacks to 6 words of zero.
   *
   * Or, put another way: the tag is first decoded as if it were not special. Since none of the bits are set, it is
   * followed by no bytes and expands to a word full of zeros. After that, the next byte is interpreted as a count of
   * additional words that are also all-zero.
   */

  ZERO = 0x00,

  /**
   * The tag is followed by the bytes of the word (as if it weren’t special), but after those bytes is another byte with
   * value N. Following that byte is N unpacked words that should be copied directly.
   *
   * These unpacked words may contain zeroes; in this implementation a minimum of PACK_SPAN_THRESHOLD zero bytes are
   * written before ending the span.
   *
   * The purpose of this rule is to minimize the impact of packing on data that doesn’t contain any zeros – in
   * particular, long text blobs. Because of this rule, the worst-case space overhead of packing is 2 bytes per 2 KiB of
   * input (256 words = 2KiB).
   */

  SPAN = 0xff,
}

/**
 * Compute the Hamming weight (number of bits set to 1) of a number. Used to figure out how many bytes follow a tag byte
 * while computing the size of a packed message.
 *
 * WARNING: Using this with floating point numbers will void your warranty.
 *
 * @param {number} x A real integer.
 * @returns {number} The hamming weight (integer).
 */

export function getHammingWeight(x: number): number {
  // Thanks, HACKMEM!

  let w = x - ((x >> 1) & 0x55555555);
  w = (w & 0x33333333) + ((w >> 2) & 0x33333333);
  return (((w + (w >> 4)) & 0x0f0f0f0f) * 0x01010101) >> 24;
}

export type byte = number;

/**
 * Compute the tag byte from the 8 bytes of a 64-bit word.
 *
 * @param {byte} a The first byte.
 * @param {byte} b The second byte.
 * @param {byte} c The third byte.
 * @param {byte} d The fourth byte.
 * @param {byte} e The fifth byte.
 * @param {byte} f The sixth byte.
 * @param {byte} g The seventh byte.
 * @param {byte} h The eighth byte (phew!).
 * @returns {number} The tag byte.
 */

export function getTagByte(a: byte, b: byte, c: byte, d: byte, e: byte, f: byte, g: byte, h: byte): number {
  // Yes, it's pretty. Don't touch it.

  return (
    (a === 0 ? 0 : 0b00000001) |
    (b === 0 ? 0 : 0b00000010) |
    (c === 0 ? 0 : 0b00000100) |
    (d === 0 ? 0 : 0b00001000) |
    (e === 0 ? 0 : 0b00010000) |
    (f === 0 ? 0 : 0b00100000) |
    (g === 0 ? 0 : 0b01000000) |
    (h === 0 ? 0 : 0b10000000)
  );
}

/**
 * Efficiently calculate the length of a packed Cap'n Proto message.
 *
 * @export
 * @param {ArrayBuffer} packed The packed message.
 * @returns {number} The length of the unpacked message in bytes.
 */

export function getUnpackedByteLength(packed: ArrayBuffer): number {
  const p = new Uint8Array(packed);
  let wordLength = 0;
  let lastTag = 0x77;

  for (let i = 0; i < p.byteLength; ) {
    const tag = p[i];

    if (lastTag === PackedTag.ZERO) {
      wordLength += tag;

      i++;

      lastTag = 0x77;
    } else if (lastTag === PackedTag.SPAN) {
      wordLength += tag;

      i += tag * 8 + 1;

      lastTag = 0x77;
    } else {
      wordLength++;

      i += getHammingWeight(tag) + 1;

      lastTag = tag;
    }
  }

  return wordLength * 8;
}

/**
 * Compute the number of zero bytes that occur in a given 64-bit word, provided as eight separate bytes.
 *
 * @param {byte} a The first byte.
 * @param {byte} b The second byte.
 * @param {byte} c The third byte.
 * @param {byte} d The fourth byte.
 * @param {byte} e The fifth byte.
 * @param {byte} f The sixth byte.
 * @param {byte} g The seventh byte.
 * @param {byte} h The eighth byte (phew!).
 * @returns {number} The number of these bytes that are zero.
 */

export function getZeroByteCount(a: byte, b: byte, c: byte, d: byte, e: byte, f: byte, g: byte, h: byte): number {
  return (
    (a === 0 ? 1 : 0) +
    (b === 0 ? 1 : 0) +
    (c === 0 ? 1 : 0) +
    (d === 0 ? 1 : 0) +
    (e === 0 ? 1 : 0) +
    (f === 0 ? 1 : 0) +
    (g === 0 ? 1 : 0) +
    (h === 0 ? 1 : 0)
  );
}

/**
 * Pack a section of a Cap'n Proto message into a compressed format. This will efficiently compress zero bytes (which
 * are common in idiomatic Cap'n Proto messages) into a compact form.
 *
 * For stream-framed messages this is called once for the frame header and once again for each segment in the message.
 *
 * The returned array buffer is trimmed to the exact size of the packed message with a single copy operation at the end.
 * This should be decent on CPU time but does require quite a lot of memory (a normal array is filled up with each
 * packed byte until the packing is complete).
 *
 * @export
 * @param {ArrayBuffer} unpacked The message to pack.
 * @param {number} [byteOffset] Starting byte offset to read bytes from, defaults to 0.
 * @param {number} [byteLength] Total number of bytes to read, defaults to the remainder of the buffer contents.
 * @returns {ArrayBuffer} A packed version of the message.
 */

export function pack(unpacked: ArrayBuffer, byteOffset = 0, byteLength?: number): ArrayBuffer {
  if (unpacked.byteLength % 8 !== 0) throw new Error(MSG_PACK_NOT_WORD_ALIGNED);

  const src = new Uint8Array(unpacked, byteOffset, byteLength);

  // TODO: Maybe we should do this with buffers? This costs more than 8x the final compressed size in temporary RAM.

  const dst: number[] = [];

  /* Just have to be sure it's neither ZERO nor SPAN. */

  let lastTag = 0x77;

  /** This is where we need to remember to write the SPAN tag (0xff). */

  let spanTagOffset = NaN;

  /** How many words have been copied during the current span. */

  let spanWordLength = 0;

  /**
   * When this hits zero, we've had PACK_SPAN_THRESHOLD zero bytes pass by and it's time to bail from the span.
   */

  let spanThreshold = PACK_SPAN_THRESHOLD;

  for (let srcByteOffset = 0; srcByteOffset < src.byteLength; srcByteOffset += 8) {
    /** Read in the entire word. Yes, this feels silly but it's fast! */

    const a = src[srcByteOffset];
    const b = src[srcByteOffset + 1];
    const c = src[srcByteOffset + 2];
    const d = src[srcByteOffset + 3];
    const e = src[srcByteOffset + 4];
    const f = src[srcByteOffset + 5];
    const g = src[srcByteOffset + 6];
    const h = src[srcByteOffset + 7];

    const tag = getTagByte(a, b, c, d, e, f, g, h);

    /** If this is true we'll skip the normal word write logic after the switch statement. */

    let skipWriteWord = true;

    switch (lastTag) {
      case PackedTag.ZERO:
        // We're writing a span of words with all zeroes in them. See if we need to bail out of the fast path.

        if (tag !== PackedTag.ZERO || spanWordLength >= 0xff) {
          // There's a bit in there or we got too many zeroes. Damn, we need to bail.

          dst.push(spanWordLength);
          spanWordLength = 0;

          skipWriteWord = false;
        } else {
          // Kay, let's quickly inc this and go.

          spanWordLength++;
        }

        break;

      case PackedTag.SPAN: {
        // We're writing a span of nonzero words.

        const zeroCount = getZeroByteCount(a, b, c, d, e, f, g, h);

        // See if we need to bail now.

        spanThreshold -= zeroCount;

        if (spanThreshold <= 0 || spanWordLength >= 0xff) {
          // Alright, time to get packing again. Write the number of words we skipped to the beginning of the span.

          dst[spanTagOffset] = spanWordLength;
          spanWordLength = 0;

          spanThreshold = PACK_SPAN_THRESHOLD;

          // We have to write this word normally.

          skipWriteWord = false;
        } else {
          // Just write this word verbatim.

          dst.push(a, b, c, d, e, f, g, h);

          spanWordLength++;
        }

        break;
      }
      default:
        // Didn't get a special tag last time, let's write this as normal.

        skipWriteWord = false;

        break;
    }

    // A goto is fast, idk why people keep hatin'.
    if (skipWriteWord) continue;

    dst.push(tag);
    lastTag = tag;

    if (a !== 0) dst.push(a);
    if (b !== 0) dst.push(b);
    if (c !== 0) dst.push(c);
    if (d !== 0) dst.push(d);
    if (e !== 0) dst.push(e);
    if (f !== 0) dst.push(f);
    if (g !== 0) dst.push(g);
    if (h !== 0) dst.push(h);

    // Record the span tag offset if needed, making sure to actually leave room for it.

    if (tag === PackedTag.SPAN) {
      spanTagOffset = dst.length;

      dst.push(0);
    }
  }

  // We're done. If we were writing a span let's finish it.

  if (lastTag === PackedTag.ZERO) {
    dst.push(spanWordLength);
  } else if (lastTag === PackedTag.SPAN) {
    dst[spanTagOffset] = spanWordLength;
  }

  return new Uint8Array(dst).buffer;
}

/**
 * Unpack a compressed Cap'n Proto message into a new ArrayBuffer.
 *
 * Unlike the `pack` function, this is able to efficiently determine the exact size needed for the output buffer and
 * runs considerably more efficiently.
 *
 * @export
 * @param {ArrayBuffer} packed An array buffer containing the packed message.
 * @returns {ArrayBuffer} The unpacked message.
 */

export function unpack(packed: ArrayBuffer): ArrayBuffer {
  // We have no choice but to read the packed buffer one byte at a time.

  const src = new Uint8Array(packed);
  const dst = new Uint8Array(new ArrayBuffer(getUnpackedByteLength(packed)));

  /** The last tag byte that we've seen - it starts at a "neutral" value. */

  let lastTag = 0x77;

  for (let srcByteOffset = 0, dstByteOffset = 0; srcByteOffset < src.byteLength; ) {
    const tag = src[srcByteOffset];

    if (lastTag === PackedTag.ZERO) {
      // We have a span of zeroes. New array buffers are guaranteed to be initialized to zero so we just seek ahead.

      dstByteOffset += tag * 8;

      srcByteOffset++;

      lastTag = 0x77;
    } else if (lastTag === PackedTag.SPAN) {
      // We have a span of unpacked bytes. Copy them verbatim from the source buffer.

      const spanByteLength = tag * 8;

      dst.set(src.subarray(srcByteOffset + 1, srcByteOffset + 1 + spanByteLength), dstByteOffset);

      dstByteOffset += spanByteLength;
      srcByteOffset += 1 + spanByteLength;

      lastTag = 0x77;
    } else {
      // Okay, a normal tag. Let's read past the tag and copy bytes that have a bit set in the tag.

      srcByteOffset++;

      for (let i = 1; i <= 0b10000000; i <<= 1) {
        // We only need to actually touch `dst` if there's a nonzero byte (it's already initialized to zeroes).

        if ((tag & i) !== 0) dst[dstByteOffset] = src[srcByteOffset++];

        dstByteOffset++;
      }

      lastTag = tag;
    }
  }

  return dst.buffer;
}