File size: 1,007 Bytes
2d8da09
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
table_structure:
  - name: col_a
    code_type: float
    args:
      code_len: 4  # number of tokens used to code the column
      base: 16   # the positional base number. ie. it uses 16 tokens for one digit
      fillall: False # whether to use full base number for each token or derive it from the data.
      hasnan: False # can it handles nan or not
      transform: yeo-johnson # can be ['yeo-johnson', 'quantile', 'robust'], check https://scikit-learn.org/stable/modules/classes.html#module-sklearn.preprocessing 
  - name: col_b
    code_type: float
    args:
      code_len: 4
      base: 32
      fillall: True
      hasnan: True
      transform: quantile
  - name: col_c
    code_type: int
    args:
      code_len: 3
      base: 12
      fillall: True
      hasnan: True
  - name: col_d
    code_type: category
    args:
      code_len: 3
      base: 12
      fillall: True
      hasnan: True
tokenizer_file: ???  # tabular tokneizer output file path
table_csv_file: ???  # input table csv file