// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. import { Vector } from './vector.js'; import { BufferType, Type } from './enum.js'; import { DataType, strideForType } from './type.js'; import { popcnt_bit_range, truncateBitmap } from './util/bit.js'; // When slicing, we do not know the null count of the sliced range without // doing some computation. To avoid doing this eagerly, we set the null count // to -1 (any negative number will do). When Vector.nullCount is called the // first time, the null count will be computed. See ARROW-33 /** @ignore */ export type kUnknownNullCount = -1; /** @ignore */ export const kUnknownNullCount = -1; /** @ignore */ export type NullBuffer = Uint8Array | null | undefined; /** @ignore */ export type TypeIdsBuffer = Int8Array | ArrayLike | Iterable | undefined; /** @ignore */ export type ValueOffsetsBuffer = Int32Array | ArrayLike | Iterable | undefined; /** @ignore */ export type DataBuffer = T['TArray'] | ArrayLike | Iterable | undefined; /** @ignore */ export interface Buffers { [BufferType.OFFSET]: Int32Array; [BufferType.DATA]: T['TArray']; [BufferType.VALIDITY]: Uint8Array; [BufferType.TYPE]: T['TArray']; } /** @ignore */ export interface Data { readonly TType: T['TType']; readonly TArray: T['TArray']; readonly TValue: T['TValue']; } /** * Data structure underlying {@link Vector}s. Use the convenience method {@link makeData}. */ export class Data { declare public readonly type: T; declare public readonly length: number; declare public readonly offset: number; declare public readonly stride: number; declare public readonly nullable: boolean; declare public readonly children: Data[]; /** * The dictionary for this Vector, if any. Only used for Dictionary type. */ declare public dictionary?: Vector; declare public readonly values: Buffers[BufferType.DATA]; declare public readonly typeIds: Buffers[BufferType.TYPE]; declare public readonly nullBitmap: Buffers[BufferType.VALIDITY]; declare public readonly valueOffsets: Buffers[BufferType.OFFSET]; public get typeId(): T['TType'] { return this.type.typeId; } public get ArrayType(): T['ArrayType'] { return this.type.ArrayType; } public get buffers() { return [this.valueOffsets, this.values, this.nullBitmap, this.typeIds] as Buffers; } public get byteLength(): number { let byteLength = 0; const { valueOffsets, values, nullBitmap, typeIds } = this; valueOffsets && (byteLength += valueOffsets.byteLength); values && (byteLength += values.byteLength); nullBitmap && (byteLength += nullBitmap.byteLength); typeIds && (byteLength += typeIds.byteLength); return this.children.reduce((byteLength, child) => byteLength + child.byteLength, byteLength); } protected _nullCount: number | kUnknownNullCount; public get nullCount() { let nullCount = this._nullCount; let nullBitmap: Uint8Array | undefined; if (nullCount <= kUnknownNullCount && (nullBitmap = this.nullBitmap)) { this._nullCount = nullCount = this.length - popcnt_bit_range(nullBitmap, this.offset, this.offset + this.length); } return nullCount; } constructor(type: T, offset: number, length: number, nullCount?: number, buffers?: Partial> | Data, children: Data[] = [], dictionary?: Vector) { this.type = type; this.children = children; this.dictionary = dictionary; this.offset = Math.floor(Math.max(offset || 0, 0)); this.length = Math.floor(Math.max(length || 0, 0)); this._nullCount = Math.floor(Math.max(nullCount || 0, -1)); let buffer: Buffers[keyof Buffers]; if (buffers instanceof Data) { this.stride = buffers.stride; this.values = buffers.values; this.typeIds = buffers.typeIds; this.nullBitmap = buffers.nullBitmap; this.valueOffsets = buffers.valueOffsets; } else { this.stride = strideForType(type); if (buffers) { (buffer = (buffers as Buffers)[0]) && (this.valueOffsets = buffer); (buffer = (buffers as Buffers)[1]) && (this.values = buffer); (buffer = (buffers as Buffers)[2]) && (this.nullBitmap = buffer); (buffer = (buffers as Buffers)[3]) && (this.typeIds = buffer); } } this.nullable = this._nullCount !== 0 && this.nullBitmap && this.nullBitmap.byteLength > 0; } public getValid(index: number) { if (this.nullable && this.nullCount > 0) { const pos = this.offset + index; const val = this.nullBitmap[pos >> 3]; return (val & (1 << (pos % 8))) !== 0; } return true; } public setValid(index: number, value: boolean) { // Don't interact w/ nullBitmap if not nullable if (!this.nullable) { return value; } // If no null bitmap, initialize one on the fly if (!this.nullBitmap || this.nullBitmap.byteLength <= (index >> 3)) { const { nullBitmap } = this._changeLengthAndBackfillNullBitmap(this.length); Object.assign(this, { nullBitmap, _nullCount: 0 }); } const { nullBitmap, offset } = this; const pos = (offset + index) >> 3; const bit = (offset + index) % 8; const val = (nullBitmap[pos] >> bit) & 1; // If `val` is truthy and the current bit is 0, flip it to 1 and increment `_nullCount`. // If `val` is falsey and the current bit is 1, flip it to 0 and decrement `_nullCount`. value ? val === 0 && ((nullBitmap[pos] |= (1 << bit)), (this._nullCount = this.nullCount + 1)) : val === 1 && ((nullBitmap[pos] &= ~(1 << bit)), (this._nullCount = this.nullCount - 1)); return value; } public clone(type: R = this.type as any, offset = this.offset, length = this.length, nullCount = this._nullCount, buffers: Buffers = this, children: Data[] = this.children) { return new Data(type, offset, length, nullCount, buffers, children, this.dictionary); } public slice(offset: number, length: number): Data { const { stride, typeId, children } = this; // +true === 1, +false === 0, so this means // we keep nullCount at 0 if it's already 0, // otherwise set to the invalidated flag -1 const nullCount = +(this._nullCount === 0) - 1; const childStride = typeId === 16 /* FixedSizeList */ ? stride : 1; const buffers = this._sliceBuffers(offset, length, stride, typeId); return this.clone(this.type, this.offset + offset, length, nullCount, buffers, // Don't slice children if we have value offsets (the variable-width types) (children.length === 0 || this.valueOffsets) ? children : this._sliceChildren(children, childStride * offset, childStride * length)); } public _changeLengthAndBackfillNullBitmap(newLength: number): Data { if (this.typeId === Type.Null) { return this.clone(this.type, 0, newLength, 0); } const { length, nullCount } = this; // start initialized with 0s (nulls), then fill from 0 to length with 1s (not null) const bitmap = new Uint8Array(((newLength + 63) & ~63) >> 3).fill(255, 0, length >> 3); // set all the bits in the last byte (up to bit `length - length % 8`) to 1 (not null) bitmap[length >> 3] = (1 << (length - (length & ~7))) - 1; // if we have a nullBitmap, truncate + slice and set it over the pre-filled 1s if (nullCount > 0) { bitmap.set(truncateBitmap(this.offset, length, this.nullBitmap), 0); } const buffers = this.buffers; buffers[BufferType.VALIDITY] = bitmap; return this.clone(this.type, 0, newLength, nullCount + (newLength - length), buffers); } protected _sliceBuffers(offset: number, length: number, stride: number, typeId: T['TType']): Buffers { let arr: any; const { buffers } = this; // If typeIds exist, slice the typeIds buffer (arr = buffers[BufferType.TYPE]) && (buffers[BufferType.TYPE] = arr.subarray(offset, offset + length)); // If offsets exist, only slice the offsets buffer (arr = buffers[BufferType.OFFSET]) && (buffers[BufferType.OFFSET] = arr.subarray(offset, offset + length + 1)) || // Otherwise if no offsets, slice the data buffer. Don't slice the data vector for Booleans, since the offset goes by bits not bytes (arr = buffers[BufferType.DATA]) && (buffers[BufferType.DATA] = typeId === 6 ? arr : arr.subarray(stride * offset, stride * (offset + length))); return buffers; } protected _sliceChildren(children: Data[], offset: number, length: number): Data[] { return children.map((child) => child.slice(offset, length)); } } (Data.prototype as any).children = Object.freeze([]); import { Dictionary, Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, Float, Int, Date_, Interval, Time, Timestamp, Union, DenseUnion, SparseUnion, } from './type.js'; import { Visitor } from './visitor.js'; import { toArrayBufferView, toInt32Array, toUint8Array } from './util/buffer.js'; class MakeDataVisitor extends Visitor { public visit(props: any): Data { return this.getVisitFn(props['type']).call(this, props); } public visitNull(props: NullDataProps) { const { ['type']: type, ['offset']: offset = 0, ['length']: length = 0, } = props; return new Data(type, offset, length, 0); } public visitBool(props: BoolDataProps) { const { ['type']: type, ['offset']: offset = 0 } = props; const nullBitmap = toUint8Array(props['nullBitmap']); const data = toArrayBufferView(type.ArrayType, props['data']); const { ['length']: length = data.length >> 3, ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0, } = props; return new Data(type, offset, length, nullCount, [undefined, data, nullBitmap]); } public visitInt(props: IntDataProps) { const { ['type']: type, ['offset']: offset = 0 } = props; const nullBitmap = toUint8Array(props['nullBitmap']); const data = toArrayBufferView(type.ArrayType, props['data']); const { ['length']: length = data.length, ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0, } = props; return new Data(type, offset, length, nullCount, [undefined, data, nullBitmap]); } public visitFloat(props: FloatDataProps) { const { ['type']: type, ['offset']: offset = 0 } = props; const nullBitmap = toUint8Array(props['nullBitmap']); const data = toArrayBufferView(type.ArrayType, props['data']); const { ['length']: length = data.length, ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0, } = props; return new Data(type, offset, length, nullCount, [undefined, data, nullBitmap]); } public visitUtf8(props: Utf8DataProps) { const { ['type']: type, ['offset']: offset = 0 } = props; const data = toUint8Array(props['data']); const nullBitmap = toUint8Array(props['nullBitmap']); const valueOffsets = toInt32Array(props['valueOffsets']); const { ['length']: length = valueOffsets.length - 1, ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0 } = props; return new Data(type, offset, length, nullCount, [valueOffsets, data, nullBitmap]); } public visitBinary(props: BinaryDataProps) { const { ['type']: type, ['offset']: offset = 0 } = props; const data = toUint8Array(props['data']); const nullBitmap = toUint8Array(props['nullBitmap']); const valueOffsets = toInt32Array(props['valueOffsets']); const { ['length']: length = valueOffsets.length - 1, ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0 } = props; return new Data(type, offset, length, nullCount, [valueOffsets, data, nullBitmap]); } public visitFixedSizeBinary(props: FixedSizeBinaryDataProps) { const { ['type']: type, ['offset']: offset = 0 } = props; const nullBitmap = toUint8Array(props['nullBitmap']); const data = toArrayBufferView(type.ArrayType, props['data']); const { ['length']: length = data.length / strideForType(type), ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0, } = props; return new Data(type, offset, length, nullCount, [undefined, data, nullBitmap]); } public visitDate(props: Date_DataProps) { const { ['type']: type, ['offset']: offset = 0 } = props; const nullBitmap = toUint8Array(props['nullBitmap']); const data = toArrayBufferView(type.ArrayType, props['data']); const { ['length']: length = data.length / strideForType(type), ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0, } = props; return new Data(type, offset, length, nullCount, [undefined, data, nullBitmap]); } public visitTimestamp(props: TimestampDataProps) { const { ['type']: type, ['offset']: offset = 0 } = props; const nullBitmap = toUint8Array(props['nullBitmap']); const data = toArrayBufferView(type.ArrayType, props['data']); const { ['length']: length = data.length / strideForType(type), ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0, } = props; return new Data(type, offset, length, nullCount, [undefined, data, nullBitmap]); } public visitTime(props: TimeDataProps) { const { ['type']: type, ['offset']: offset = 0 } = props; const nullBitmap = toUint8Array(props['nullBitmap']); const data = toArrayBufferView(type.ArrayType, props['data']); const { ['length']: length = data.length / strideForType(type), ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0, } = props; return new Data(type, offset, length, nullCount, [undefined, data, nullBitmap]); } public visitDecimal(props: DecimalDataProps) { const { ['type']: type, ['offset']: offset = 0 } = props; const nullBitmap = toUint8Array(props['nullBitmap']); const data = toArrayBufferView(type.ArrayType, props['data']); const { ['length']: length = data.length / strideForType(type), ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0, } = props; return new Data(type, offset, length, nullCount, [undefined, data, nullBitmap]); } public visitList(props: ListDataProps) { const { ['type']: type, ['offset']: offset = 0, ['child']: child } = props; const nullBitmap = toUint8Array(props['nullBitmap']); const valueOffsets = toInt32Array(props['valueOffsets']); const { ['length']: length = valueOffsets.length - 1, ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0 } = props; return new Data(type, offset, length, nullCount, [valueOffsets, undefined, nullBitmap], [child]); } public visitStruct(props: StructDataProps) { const { ['type']: type, ['offset']: offset = 0, ['children']: children = [] } = props; const nullBitmap = toUint8Array(props['nullBitmap']); const { length = children.reduce((len, { length }) => Math.max(len, length), 0), nullCount = props['nullBitmap'] ? -1 : 0 } = props; return new Data(type, offset, length, nullCount, [undefined, undefined, nullBitmap], children); } public visitUnion(props: UnionDataProps) { const { ['type']: type, ['offset']: offset = 0, ['children']: children = [] } = props; const nullBitmap = toUint8Array(props['nullBitmap']); const typeIds = toArrayBufferView(type.ArrayType, props['typeIds']); const { ['length']: length = typeIds.length, ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0, } = props; if (DataType.isSparseUnion(type)) { return new Data(type, offset, length, nullCount, [undefined, undefined, nullBitmap, typeIds], children); } const valueOffsets = toInt32Array(props['valueOffsets']); return new Data(type, offset, length, nullCount, [valueOffsets, undefined, nullBitmap, typeIds], children); } public visitDictionary(props: DictionaryDataProps) { const { ['type']: type, ['offset']: offset = 0 } = props; const nullBitmap = toUint8Array(props['nullBitmap']); const data = toArrayBufferView(type.indices.ArrayType, props['data']); const { ['dictionary']: dictionary = new Vector([new MakeDataVisitor().visit({ type: type.dictionary })]) } = props; const { ['length']: length = data.length, ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0 } = props; return new Data(type, offset, length, nullCount, [undefined, data, nullBitmap], [], dictionary); } public visitInterval(props: IntervalDataProps) { const { ['type']: type, ['offset']: offset = 0 } = props; const nullBitmap = toUint8Array(props['nullBitmap']); const data = toArrayBufferView(type.ArrayType, props['data']); const { ['length']: length = data.length / strideForType(type), ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0, } = props; return new Data(type, offset, length, nullCount, [undefined, data, nullBitmap]); } public visitFixedSizeList(props: FixedSizeListDataProps) { const { ['type']: type, ['offset']: offset = 0, ['child']: child = new MakeDataVisitor().visit({ type: type.valueType }) } = props; const nullBitmap = toUint8Array(props['nullBitmap']); const { ['length']: length = child.length / strideForType(type), ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0 } = props; return new Data(type, offset, length, nullCount, [undefined, undefined, nullBitmap], [child]); } public visitMap(props: Map_DataProps) { const { ['type']: type, ['offset']: offset = 0, ['child']: child = new MakeDataVisitor().visit({ type: type.childType }) } = props; const nullBitmap = toUint8Array(props['nullBitmap']); const valueOffsets = toInt32Array(props['valueOffsets']); const { ['length']: length = valueOffsets.length - 1, ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0, } = props; return new Data(type, offset, length, nullCount, [valueOffsets, undefined, nullBitmap], [child]); } } /** @ignore */ interface DataProps_ { type: T; offset?: number; length?: number; nullCount?: number; nullBitmap?: NullBuffer; } interface NullDataProps { type: T; offset?: number; length?: number } interface IntDataProps extends DataProps_ { data?: DataBuffer } interface DictionaryDataProps extends DataProps_ { data?: DataBuffer; dictionary?: Vector } interface FloatDataProps extends DataProps_ { data?: DataBuffer } interface BoolDataProps extends DataProps_ { data?: DataBuffer } interface DecimalDataProps extends DataProps_ { data?: DataBuffer } interface Date_DataProps extends DataProps_ { data?: DataBuffer } interface TimeDataProps extends DataProps_ { data?: DataBuffer } interface TimestampDataProps extends DataProps_ { data?: DataBuffer } interface IntervalDataProps extends DataProps_ { data?: DataBuffer } interface FixedSizeBinaryDataProps extends DataProps_ { data?: DataBuffer } interface BinaryDataProps extends DataProps_ { valueOffsets: ValueOffsetsBuffer; data?: DataBuffer } interface Utf8DataProps extends DataProps_ { valueOffsets: ValueOffsetsBuffer; data?: DataBuffer } interface ListDataProps extends DataProps_ { valueOffsets: ValueOffsetsBuffer; child: Data } interface FixedSizeListDataProps extends DataProps_ { child: Data } interface StructDataProps extends DataProps_ { children: Data[] } interface Map_DataProps extends DataProps_ { valueOffsets: ValueOffsetsBuffer; child: Data } interface SparseUnionDataProps extends DataProps_ { typeIds: TypeIdsBuffer; children: Data[] } interface DenseUnionDataProps extends DataProps_ { typeIds: TypeIdsBuffer; children: Data[]; valueOffsets: ValueOffsetsBuffer } interface UnionDataProps extends DataProps_ { typeIds: TypeIdsBuffer; children: Data[]; valueOffsets?: ValueOffsetsBuffer } export type DataProps = ( T extends Null /* */ ? NullDataProps : T extends Int /* */ ? IntDataProps : T extends Dictionary /* */ ? DictionaryDataProps : T extends Float /* */ ? FloatDataProps : T extends Bool /* */ ? BoolDataProps : T extends Decimal /* */ ? DecimalDataProps : T extends Date_ /* */ ? Date_DataProps : T extends Time /* */ ? TimeDataProps : T extends Timestamp /* */ ? TimestampDataProps : T extends Interval /* */ ? IntervalDataProps : T extends FixedSizeBinary /* */ ? FixedSizeBinaryDataProps : T extends Binary /* */ ? BinaryDataProps : T extends Utf8 /* */ ? Utf8DataProps : T extends List /* */ ? ListDataProps : T extends FixedSizeList /* */ ? FixedSizeListDataProps : T extends Struct /* */ ? StructDataProps : T extends Map_ /* */ ? Map_DataProps : T extends SparseUnion /* */ ? SparseUnionDataProps : T extends DenseUnion /* */ ? DenseUnionDataProps : T extends Union /* */ ? UnionDataProps : /* */ DataProps_ ); export function makeData(props: NullDataProps): Data; export function makeData(props: IntDataProps): Data; export function makeData(props: DictionaryDataProps): Data; export function makeData(props: FloatDataProps): Data; export function makeData(props: BoolDataProps): Data; export function makeData(props: DecimalDataProps): Data; export function makeData(props: Date_DataProps): Data; export function makeData(props: TimeDataProps): Data; export function makeData(props: TimestampDataProps): Data; export function makeData(props: IntervalDataProps): Data; export function makeData(props: FixedSizeBinaryDataProps): Data; export function makeData(props: BinaryDataProps): Data; export function makeData(props: Utf8DataProps): Data; export function makeData(props: ListDataProps): Data; export function makeData(props: FixedSizeListDataProps): Data; export function makeData(props: StructDataProps): Data; export function makeData(props: Map_DataProps): Data; export function makeData(props: SparseUnionDataProps): Data; export function makeData(props: DenseUnionDataProps): Data; export function makeData(props: UnionDataProps): Data; export function makeData(props: DataProps_): Data; export function makeData(props: any) { return new MakeDataVisitor().visit(props); }