// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. import { Type } from './enum.js'; import { Data, makeData } from './data.js'; import { vectorFromArray } from './factories.js'; import { makeVector, Vector } from './vector.js'; import { Field, Schema } from './schema.js'; import { DataType, Null, Struct, TypeMap } from './type.js'; import { compareSchemas } from './visitor/typecomparator.js'; import { distributeVectorsIntoRecordBatches } from './util/recordbatch.js'; import { isChunkedValid, computeChunkOffsets, computeChunkNullCounts, wrapChunkedCall1, wrapChunkedCall2, wrapChunkedIndexOf, sliceChunks, } from './util/chunk.js'; import { instance as getVisitor } from './visitor/get.js'; import { instance as setVisitor } from './visitor/set.js'; import { instance as indexOfVisitor } from './visitor/indexof.js'; import { instance as iteratorVisitor } from './visitor/iterator.js'; import { instance as byteLengthVisitor } from './visitor/bytelength.js'; import { DataProps } from './data.js'; import { clampRange } from './util/vector.js'; import { ArrayDataType, BigIntArray, TypedArray, TypedArrayDataType } from './interfaces.js'; import { RecordBatch, _InternalEmptyPlaceholderRecordBatch } from './recordbatch.js'; /** @ignore */ export interface Table { /// // Virtual properties for the TypeScript compiler. // These do not exist at runtime. /// readonly TType: Struct; readonly TArray: Struct['TArray']; readonly TValue: Struct['TValue']; /** * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Symbol/isConcatSpreadable */ [Symbol.isConcatSpreadable]: true; } /** * Tables are collections of {@link Vector}s and have a {@link Schema}. Use the convenience methods {@link makeTable} * or {@link tableFromArrays} to create a table in JavaScript. To create a table from the IPC format, use * {@link tableFromIPC}. */ export class Table { constructor(); constructor(batches: Iterable>); constructor(...batches: readonly RecordBatch[]); constructor(...columns: { [P in keyof T]: Vector }[]); constructor(...columns: { [P in keyof T]: Data | DataProps }[]); constructor(schema: Schema, data?: RecordBatch | RecordBatch[]); constructor(schema: Schema, data?: RecordBatch | RecordBatch[], offsets?: Uint32Array); constructor(...args: any[]) { if (args.length === 0) { this.batches = []; this.schema = new Schema([]); this._offsets = [0]; return this; } let schema: Schema | undefined; let offsets: Uint32Array | number[] | undefined; if (args[0] instanceof Schema) { schema = args.shift() as Schema; } if (args[args.length - 1] instanceof Uint32Array) { offsets = args.pop(); } const unwrap = (x: any): RecordBatch[] => { if (x) { if (x instanceof RecordBatch) { return [x]; } else if (x instanceof Table) { return x.batches; } else if (x instanceof Data) { if (x.type instanceof Struct) { return [new RecordBatch(new Schema(x.type.children), x)]; } } else if (Array.isArray(x)) { return x.flatMap(v => unwrap(v)); } else if (typeof x[Symbol.iterator] === 'function') { return [...x].flatMap(v => unwrap(v)); } else if (typeof x === 'object') { const keys = Object.keys(x) as (keyof T)[]; const vecs = keys.map((k) => new Vector([x[k]])); const schema = new Schema(keys.map((k, i) => new Field(String(k), vecs[i].type))); const [, batches] = distributeVectorsIntoRecordBatches(schema, vecs); return batches.length === 0 ? [new RecordBatch(x)] : batches; } } return []; }; const batches = args.flatMap(v => unwrap(v)); schema = schema ?? batches[0]?.schema ?? new Schema([]); if (!(schema instanceof Schema)) { throw new TypeError('Table constructor expects a [Schema, RecordBatch[]] pair.'); } for (const batch of batches) { if (!(batch instanceof RecordBatch)) { throw new TypeError('Table constructor expects a [Schema, RecordBatch[]] pair.'); } if (!compareSchemas(schema, batch.schema)) { throw new TypeError('Table and inner RecordBatch schemas must be equivalent.'); } } this.schema = schema; this.batches = batches; this._offsets = offsets ?? computeChunkOffsets(this.data); } declare protected _offsets: Uint32Array | number[]; declare protected _nullCount: number; declare public readonly schema: Schema; /** * The contiguous {@link RecordBatch `RecordBatch`} chunks of the Table rows. */ declare public readonly batches: RecordBatch[]; /** * The contiguous {@link RecordBatch `RecordBatch`} chunks of the Table rows. */ public get data() { return this.batches.map(({ data }) => data); } /** * The number of columns in this Table. */ public get numCols() { return this.schema.fields.length; } /** * The number of rows in this Table. */ public get numRows() { return this.data.reduce((numRows, data) => numRows + data.length, 0); } /** * The number of null rows in this Table. */ public get nullCount() { if (this._nullCount === -1) { this._nullCount = computeChunkNullCounts(this.data); } return this._nullCount; } /** * Check whether an element is null. * * @param index The index at which to read the validity bitmap. */ // @ts-ignore public isValid(index: number): boolean { return false; } /** * Get an element value by position. * * @param index The index of the element to read. */ // @ts-ignore public get(index: number): Struct['TValue'] | null { return null; } /** * Set an element value by position. * * @param index The index of the element to write. * @param value The value to set. */ // @ts-ignore public set(index: number, value: Struct['TValue'] | null): void { return; } /** * Retrieve the index of the first occurrence of a value in an Vector. * * @param element The value to locate in the Vector. * @param offset The index at which to begin the search. If offset is omitted, the search starts at index 0. */ // @ts-ignore public indexOf(element: Struct['TValue'], offset?: number): number { return -1; } /** * Get the size in bytes of an element by index. * @param index The index at which to get the byteLength. */ // @ts-ignore public getByteLength(index: number): number { return 0; } /** * Iterator for rows in this Table. */ public [Symbol.iterator]() { if (this.batches.length > 0) { return iteratorVisitor.visit(new Vector(this.data)) as IterableIterator['TValue']>; } return (new Array(0))[Symbol.iterator](); } /** * Return a JavaScript Array of the Table rows. * * @returns An Array of Table rows. */ public toArray() { return [...this]; } /** * Returns a string representation of the Table rows. * * @returns A string representation of the Table rows. */ public toString() { return `[\n ${this.toArray().join(',\n ')}\n]`; } /** * Combines two or more Tables of the same schema. * * @param others Additional Tables to add to the end of this Tables. */ public concat(...others: Table[]) { const schema = this.schema; const data = this.data.concat(others.flatMap(({ data }) => data)); return new Table(schema, data.map((data) => new RecordBatch(schema, data))); } /** * Return a zero-copy sub-section of this Table. * * @param begin The beginning of the specified portion of the Table. * @param end The end of the specified portion of the Table. This is exclusive of the element at the index 'end'. */ public slice(begin?: number, end?: number): Table { const schema = this.schema; [begin, end] = clampRange({ length: this.numRows }, begin, end); const data = sliceChunks(this.data, this._offsets, begin, end); return new Table(schema, data.map((chunk) => new RecordBatch(schema, chunk))); } /** * Returns a child Vector by name, or null if this Vector has no child with the given name. * * @param name The name of the child to retrieve. */ public getChild

(name: P) { return this.getChildAt(this.schema.fields.findIndex((f) => f.name === name)); } /** * Returns a child Vector by index, or null if this Vector has no child at the supplied index. * * @param index The index of the child to retrieve. */ public getChildAt(index: number): Vector | null { if (index > -1 && index < this.schema.fields.length) { const data = this.data.map((data) => data.children[index] as Data); if (data.length === 0) { const { type } = this.schema.fields[index] as Field; const empty = makeData({ type, length: 0, nullCount: 0 }); data.push(empty._changeLengthAndBackfillNullBitmap(this.numRows)); } return new Vector(data); } return null; } /** * Sets a child Vector by name. * * @param name The name of the child to overwrite. * @returns A new Table with the supplied child for the specified name. */ public setChild

(name: P, child: Vector) { return this.setChildAt(this.schema.fields?.findIndex((f) => f.name === name), child) as Table; } /** * Sets a child Vector by index. * * @param index The index of the child to overwrite. * @returns A new Table with the supplied child at the specified index. */ public setChildAt(index: number, child?: null): Table; public setChildAt(index: number, child: Vector): Table; public setChildAt(index: number, child: any) { let schema: Schema = this.schema; let batches: RecordBatch[] = [...this.batches]; if (index > -1 && index < this.numCols) { if (!child) { child = new Vector([makeData({ type: new Null, length: this.numRows })]); } const fields = schema.fields.slice() as Field[]; const field = fields[index].clone({ type: child.type }); const children = this.schema.fields.map((_, i) => this.getChildAt(i)!); [fields[index], children[index]] = [field, child]; [schema, batches] = distributeVectorsIntoRecordBatches(schema, children); } return new Table(schema, batches); } /** * Construct a new Table containing only specified columns. * * @param columnNames Names of columns to keep. * @returns A new Table of columns matching the specified names. */ public select(columnNames: K[]) { const nameToIndex = this.schema.fields.reduce((m, f, i) => m.set(f.name as K, i), new Map()); return this.selectAt(columnNames.map((columnName) => nameToIndex.get(columnName)!).filter((x) => x > -1)); } /** * Construct a new Table containing only columns at the specified indices. * * @param columnIndices Indices of columns to keep. * @returns A new Table of columns at the specified indices. */ public selectAt(columnIndices: number[]) { const schema = this.schema.selectAt(columnIndices); const data = this.batches.map((batch) => batch.selectAt(columnIndices)); return new Table<{ [key: string]: K }>(schema, data); } public assign(other: Table) { const fields = this.schema.fields; const [indices, oldToNew] = other.schema.fields.reduce((memo, f2, newIdx) => { const [indices, oldToNew] = memo; const i = fields.findIndex((f) => f.name === f2.name); ~i ? (oldToNew[i] = newIdx) : indices.push(newIdx); return memo; }, [[], []] as number[][]); const schema = this.schema.assign(other.schema); const columns = [ ...fields.map((_, i) => [i, oldToNew[i]]).map(([i, j]) => (j === undefined ? this.getChildAt(i) : other.getChildAt(j))!), ...indices.map((i) => other.getChildAt(i)!) ].filter(Boolean) as Vector<(T & R)[keyof T | keyof R]>[]; return new Table(...distributeVectorsIntoRecordBatches(schema, columns)); } // Initialize this static property via an IIFE so bundlers don't tree-shake // out this logic, but also so we're still compliant with `"sideEffects": false` protected static [Symbol.toStringTag] = ((proto: Table) => { (proto as any).schema = null; (proto as any).batches = []; (proto as any)._offsets = new Uint32Array([0]); (proto as any)._nullCount = -1; (proto as any)[Symbol.isConcatSpreadable] = true; (proto as any)['isValid'] = wrapChunkedCall1(isChunkedValid); (proto as any)['get'] = wrapChunkedCall1(getVisitor.getVisitFn(Type.Struct)); (proto as any)['set'] = wrapChunkedCall2(setVisitor.getVisitFn(Type.Struct)); (proto as any)['indexOf'] = wrapChunkedIndexOf(indexOfVisitor.getVisitFn(Type.Struct)); (proto as any)['getByteLength'] = wrapChunkedCall1(byteLengthVisitor.getVisitFn(Type.Struct)); return 'Table'; })(Table.prototype); } type VectorsMap = { [P in keyof T]: Vector }; /** * Creates a new Table from an object of typed arrays. * * @example * ```ts * const table = makeTable({ * a: new Int8Array([1, 2, 3]), * }) * ``` * * @param input Input an object of typed arrays. * @returns A new Table. */ export function makeTable>(input: I): Table<{ [P in keyof I]: TypedArrayDataType }> { type T = { [P in keyof I]: TypedArrayDataType }; const vecs = {} as VectorsMap; const inputs = Object.entries(input) as [keyof I, I[keyof I]][]; for (const [key, col] of inputs) { vecs[key] = makeVector(col); } return new Table(vecs); } /** * Creates a new Table from an object of typed arrays or JavaScript arrays. * * @example * ```ts * const table = tableFromArrays({ * a: [1, 2, 3], * b: new Int8Array([1, 2, 3]), * }) * ``` * * @param input Input an object of typed arrays or JavaScript arrays. * @returns A new Table. */ export function tableFromArrays>(input: I): Table<{ [P in keyof I]: ArrayDataType }> { type T = { [P in keyof I]: ArrayDataType }; const vecs = {} as VectorsMap; const inputs = Object.entries(input) as [keyof I, I[keyof I]][]; for (const [key, col] of inputs) { vecs[key] = vectorFromArray(col); } return new Table(vecs); }