metadata
pipeline_tag: tabular-classification
tags:
- sklearn
language:
- ko
library_name: sklearn
label_encoder_map
label_encoder_map = {
"μμ½νμ ν": LabelEncoder(),
"μμμ": LabelEncoder(),
"μμλ€": LabelEncoder(),
"λΆν μ μ": LabelEncoder(),
"λΆν μ λ€": LabelEncoder(),
"μ νμ½λλͺ
": LabelEncoder(),
}
ds = ds.add_column('μμ½νμ ν_encoded', label_encoder_map['μμ½νμ ν'].fit_transform(ds.to_pandas()['μμ½νμ ν']))
ds = ds.add_column('μμμ_encoded', label_encoder_map['μμμ'].fit_transform(ds.to_pandas()['μμμ']))
ds = ds.add_column('μμλ€_encoded', label_encoder_map['μμλ€'].fit_transform(ds.to_pandas()['μμλ€']))
ds = ds.add_column('λΆν μ μ_encoded', label_encoder_map['λΆν μ μ'].fit_transform(ds.to_pandas()['λΆν μ μ']))
ds = ds.add_column('λΆν μ λ€_encoded', label_encoder_map['λΆν μ λ€'].fit_transform(ds.to_pandas()['λΆν μ λ€']))
ds = ds.add_column('μ νμ½λλͺ
_encoded', label_encoder_map['μ νμ½λλͺ
'].fit_transform(ds.to_pandas()['μ νμ½λλͺ
']))
knn = KNeighborsClassifier(n_neighbors=5, metric='cosine')
knn.fit(ds.select_columns(
['μμ½νμ ν_encoded', 'μμμ_encoded', 'μμλ€_encoded', 'λΆν μ μ_encoded', 'λΆν μ λ€_encoded', 'μ νμ½λλͺ
_encoded']).to_pandas(), ds.select_columns("νλͺ©λͺ
").to_pandas())
Condensed-Co-Graph-And-Size-Graph
from datasets import load_dataset, disable_caching, Value
import numpy as np
from sklearn.preprocessing import LabelEncoder
co_graph_edges = load_dataset('brainer/pill_identification_graph', 'co-graph-edges')['train']
co_graph_nodes = load_dataset('brainer/pill_identification_graph', 'co-graph-nodes')['train']
size_graph_edges = load_dataset('brainer/pill_identification_graph', 'size-graph-edges')['train']
size_graph_nodes = load_dataset('brainer/pill_identification_graph', 'size-graph-nodes')['train']
pill_ingredients_edges = load_dataset('brainer/pill_identification_graph', 'merge-hira-pill_identification-edges')['train']
pill_ingredients_nodes = load_dataset('brainer/pill_identification_graph', 'merge-hira-pill_identification-nodes')['train']
co_graph_nodes, co_graph_edges, size_graph_nodes, size_graph_edges, pill_ingredients_nodes, pill_ingredients_edges
pill_identification_data = load_dataset('brainer/pill_identification_data', 'default')
drug_name_encoder = LabelEncoder()
gnl_nm_encoder = LabelEncoder()
item_serial_number = pill_identification_data.cast_column('νλͺ©μΌλ ¨λ²νΈ', Value(dtype='string'))['train']['νλͺ©μΌλ ¨λ²νΈ']
drug_name_encoder.fit(list(set(np.asarray(size_graph_nodes['id'] + size_graph_edges['target'] + pill_ingredients_edges['target'] + item_serial_number))))
gnl_nm_encoder.fit(list(set(np.asarray(co_graph_nodes['id'] + pill_ingredients_nodes['id']))))