pill-identifier / README.md
brainer's picture
Update README.md
dc6adb0
metadata
pipeline_tag: tabular-classification
tags:
  - sklearn
language:
  - ko
library_name: sklearn

label_encoder_map

label_encoder_map = {
    "μ˜μ•½ν’ˆμ œν˜•": LabelEncoder(),
    "μƒ‰μƒμ•ž": LabelEncoder(),
    "색상뒀": LabelEncoder(),
    "λΆ„ν• μ„ μ•ž": LabelEncoder(),
    "λΆ„ν• μ„ λ’€": LabelEncoder(),
    "μ œν˜•μ½”λ“œλͺ…": LabelEncoder(),
}

ds = ds.add_column('μ˜μ•½ν’ˆμ œν˜•_encoded', label_encoder_map['μ˜μ•½ν’ˆμ œν˜•'].fit_transform(ds.to_pandas()['μ˜μ•½ν’ˆμ œν˜•']))
ds = ds.add_column('μƒ‰μƒμ•ž_encoded', label_encoder_map['μƒ‰μƒμ•ž'].fit_transform(ds.to_pandas()['μƒ‰μƒμ•ž']))
ds = ds.add_column('색상뒀_encoded', label_encoder_map['색상뒀'].fit_transform(ds.to_pandas()['색상뒀']))
ds = ds.add_column('λΆ„ν• μ„ μ•ž_encoded', label_encoder_map['λΆ„ν• μ„ μ•ž'].fit_transform(ds.to_pandas()['λΆ„ν• μ„ μ•ž']))
ds = ds.add_column('λΆ„ν• μ„ λ’€_encoded', label_encoder_map['λΆ„ν• μ„ λ’€'].fit_transform(ds.to_pandas()['λΆ„ν• μ„ λ’€']))
ds = ds.add_column('μ œν˜•μ½”λ“œλͺ…_encoded', label_encoder_map['μ œν˜•μ½”λ“œλͺ…'].fit_transform(ds.to_pandas()['μ œν˜•μ½”λ“œλͺ…']))
knn = KNeighborsClassifier(n_neighbors=5, metric='cosine')
knn.fit(ds.select_columns(
    ['μ˜μ•½ν’ˆμ œν˜•_encoded', 'μƒ‰μƒμ•ž_encoded', '색상뒀_encoded', 'λΆ„ν• μ„ μ•ž_encoded', 'λΆ„ν• μ„ λ’€_encoded', 'μ œν˜•μ½”λ“œλͺ…_encoded']).to_pandas(), ds.select_columns("ν’ˆλͺ©λͺ…").to_pandas())

Full code

Condensed-Co-Graph-And-Size-Graph

from datasets import load_dataset, disable_caching, Value
import numpy as np
from sklearn.preprocessing import LabelEncoder

co_graph_edges = load_dataset('brainer/pill_identification_graph', 'co-graph-edges')['train']
co_graph_nodes = load_dataset('brainer/pill_identification_graph', 'co-graph-nodes')['train']
size_graph_edges = load_dataset('brainer/pill_identification_graph', 'size-graph-edges')['train']
size_graph_nodes = load_dataset('brainer/pill_identification_graph', 'size-graph-nodes')['train']
pill_ingredients_edges = load_dataset('brainer/pill_identification_graph', 'merge-hira-pill_identification-edges')['train']
pill_ingredients_nodes = load_dataset('brainer/pill_identification_graph', 'merge-hira-pill_identification-nodes')['train']
co_graph_nodes, co_graph_edges, size_graph_nodes, size_graph_edges, pill_ingredients_nodes, pill_ingredients_edges
pill_identification_data = load_dataset('brainer/pill_identification_data', 'default')



drug_name_encoder = LabelEncoder()
gnl_nm_encoder = LabelEncoder()


item_serial_number = pill_identification_data.cast_column('ν’ˆλͺ©μΌλ ¨λ²ˆν˜Έ', Value(dtype='string'))['train']['ν’ˆλͺ©μΌλ ¨λ²ˆν˜Έ']

drug_name_encoder.fit(list(set(np.asarray(size_graph_nodes['id'] + size_graph_edges['target'] + pill_ingredients_edges['target'] + item_serial_number))))

gnl_nm_encoder.fit(list(set(np.asarray(co_graph_nodes['id'] + pill_ingredients_nodes['id']))))