File size: 1,007 Bytes
2d8da09 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
table_structure:
- name: col_a
code_type: float
args:
code_len: 4 # number of tokens used to code the column
base: 16 # the positional base number. ie. it uses 16 tokens for one digit
fillall: False # whether to use full base number for each token or derive it from the data.
hasnan: False # can it handles nan or not
transform: yeo-johnson # can be ['yeo-johnson', 'quantile', 'robust'], check https://scikit-learn.org/stable/modules/classes.html#module-sklearn.preprocessing
- name: col_b
code_type: float
args:
code_len: 4
base: 32
fillall: True
hasnan: True
transform: quantile
- name: col_c
code_type: int
args:
code_len: 3
base: 12
fillall: True
hasnan: True
- name: col_d
code_type: category
args:
code_len: 3
base: 12
fillall: True
hasnan: True
tokenizer_file: ??? # tabular tokneizer output file path
table_csv_file: ??? # input table csv file
|