Edit model card

You need to agree to share your contact information to access this model

This repository is publicly accessible, but you have to accept the conditions to access its files and content.

Log in or Sign Up to review the conditions and access this model content.

label_encoder_map

label_encoder_map = {
    "μ˜μ•½ν’ˆμ œν˜•": LabelEncoder(),
    "μƒ‰μƒμ•ž": LabelEncoder(),
    "색상뒀": LabelEncoder(),
    "λΆ„ν• μ„ μ•ž": LabelEncoder(),
    "λΆ„ν• μ„ λ’€": LabelEncoder(),
    "μ œν˜•μ½”λ“œλͺ…": LabelEncoder(),
}

ds = ds.add_column('μ˜μ•½ν’ˆμ œν˜•_encoded', label_encoder_map['μ˜μ•½ν’ˆμ œν˜•'].fit_transform(ds.to_pandas()['μ˜μ•½ν’ˆμ œν˜•']))
ds = ds.add_column('μƒ‰μƒμ•ž_encoded', label_encoder_map['μƒ‰μƒμ•ž'].fit_transform(ds.to_pandas()['μƒ‰μƒμ•ž']))
ds = ds.add_column('색상뒀_encoded', label_encoder_map['색상뒀'].fit_transform(ds.to_pandas()['색상뒀']))
ds = ds.add_column('λΆ„ν• μ„ μ•ž_encoded', label_encoder_map['λΆ„ν• μ„ μ•ž'].fit_transform(ds.to_pandas()['λΆ„ν• μ„ μ•ž']))
ds = ds.add_column('λΆ„ν• μ„ λ’€_encoded', label_encoder_map['λΆ„ν• μ„ λ’€'].fit_transform(ds.to_pandas()['λΆ„ν• μ„ λ’€']))
ds = ds.add_column('μ œν˜•μ½”λ“œλͺ…_encoded', label_encoder_map['μ œν˜•μ½”λ“œλͺ…'].fit_transform(ds.to_pandas()['μ œν˜•μ½”λ“œλͺ…']))
knn = KNeighborsClassifier(n_neighbors=5, metric='cosine')
knn.fit(ds.select_columns(
    ['μ˜μ•½ν’ˆμ œν˜•_encoded', 'μƒ‰μƒμ•ž_encoded', '색상뒀_encoded', 'λΆ„ν• μ„ μ•ž_encoded', 'λΆ„ν• μ„ λ’€_encoded', 'μ œν˜•μ½”λ“œλͺ…_encoded']).to_pandas(), ds.select_columns("ν’ˆλͺ©λͺ…").to_pandas())

Full code

Condensed-Co-Graph-And-Size-Graph

from datasets import load_dataset, disable_caching, Value
import numpy as np
from sklearn.preprocessing import LabelEncoder

co_graph_edges = load_dataset('brainer/pill_identification_graph', 'co-graph-edges')['train']
co_graph_nodes = load_dataset('brainer/pill_identification_graph', 'co-graph-nodes')['train']
size_graph_edges = load_dataset('brainer/pill_identification_graph', 'size-graph-edges')['train']
size_graph_nodes = load_dataset('brainer/pill_identification_graph', 'size-graph-nodes')['train']
pill_ingredients_edges = load_dataset('brainer/pill_identification_graph', 'merge-hira-pill_identification-edges')['train']
pill_ingredients_nodes = load_dataset('brainer/pill_identification_graph', 'merge-hira-pill_identification-nodes')['train']
co_graph_nodes, co_graph_edges, size_graph_nodes, size_graph_edges, pill_ingredients_nodes, pill_ingredients_edges
pill_identification_data = load_dataset('brainer/pill_identification_data', 'default')



drug_name_encoder = LabelEncoder()
gnl_nm_encoder = LabelEncoder()


item_serial_number = pill_identification_data.cast_column('ν’ˆλͺ©μΌλ ¨λ²ˆν˜Έ', Value(dtype='string'))['train']['ν’ˆλͺ©μΌλ ¨λ²ˆν˜Έ']

drug_name_encoder.fit(list(set(np.asarray(size_graph_nodes['id'] + size_graph_edges['target'] + pill_ingredients_edges['target'] + item_serial_number))))

gnl_nm_encoder.fit(list(set(np.asarray(co_graph_nodes['id'] + pill_ingredients_nodes['id']))))
Downloads last month
0