File size: 1,007 Bytes

2d8da09

table_structure:
  - name: col_a
    code_type: float
    args:
      code_len: 4  # number of tokens used to code the column
      base: 16   # the positional base number. ie. it uses 16 tokens for one digit
      fillall: False # whether to use full base number for each token or derive it from the data.
      hasnan: False # can it handles nan or not
      transform: yeo-johnson # can be ['yeo-johnson', 'quantile', 'robust'], check https://scikit-learn.org/stable/modules/classes.html#module-sklearn.preprocessing 
  - name: col_b
    code_type: float
    args:
      code_len: 4
      base: 32
      fillall: True
      hasnan: True
      transform: quantile
  - name: col_c
    code_type: int
    args:
      code_len: 3
      base: 12
      fillall: True
      hasnan: True
  - name: col_d
    code_type: category
    args:
      code_len: 3
      base: 12
      fillall: True
      hasnan: True
tokenizer_file: ???  # tabular tokneizer output file path
table_csv_file: ???  # input table csv file