import sys tabpfn_path = 'TabPFN' sys.path.insert(0, tabpfn_path) # our submodule of the TabPFN repo (at 045c8400203ebd062346970b4f2c0ccda5a40618) from TabPFN.scripts.transformer_prediction_interface import TabPFNClassifier import numpy as np import pandas as pd import torch import gradio as gr import openml def compute(table: np.array): vfunc = np.vectorize(lambda s: len(s)) non_empty_row_mask = (vfunc(table).sum(1) != 0) table = table[non_empty_row_mask] empty_mask = table == '' empty_inds = np.where(empty_mask) if not len(empty_inds[0]): return "**Please leave at least one field blank for prediction.**", None if not np.all(empty_inds[1][0] == empty_inds[1]): return "**Please only leave fields of one column blank for prediction.**", None y_column = empty_inds[1][0] eval_lines = empty_inds[0] train_table = np.delete(table, eval_lines, axis=0) eval_table = table[eval_lines] try: x_train = torch.tensor(np.delete(train_table, y_column, axis=1).astype(np.float32)) x_eval = torch.tensor(np.delete(eval_table, y_column, axis=1).astype(np.float32)) y_train = train_table[:, y_column] except ValueError: return "**Please only add numbers (to the inputs) or leave fields empty.**", None classifier = TabPFNClassifier(base_path=tabpfn_path, device='cpu') classifier.fit(x_train, y_train) y_eval, p_eval = classifier.predict(x_eval, return_winning_probability=True) # print(file, type(file)) out_table = table.copy().astype(str) out_table[eval_lines, y_column] = [f"{y_e} (p={p_e:.2f})" for y_e, p_e in zip(y_eval, p_eval)] return None, out_table def upload_file(file): if file.name.endswith('.arff'): dataset = openml.datasets.OpenMLDataset('t', 'test', data_file=file.name) X_, _, categorical_indicator_, attribute_names_ = dataset.get_data( dataset_format="array" ) df = pd.DataFrame(X_, columns=attribute_names_) return df elif file.name.endswith('.csv') or file.name.endswith('.data'): df = pd.read_csv(file.name, header=None) df.columns = np.arange(len(df.columns)) print(df) return df example = \ [ [1, 2, 1], [2, 1, 1], [1, 1, 1], [2, 2, 2], [3, 4, 2], [3, 2, 2], [2, 3, ''] ] with gr.Blocks() as demo: gr.Markdown("""This demo allows you to play with the **TabPFN**. You can either change the table manually (we have filled it with a toy benchmark, sum up to 3 has label 1 and over that label 2). The network predicts fields you leave empty. Only one column can have empty entries that are predicted. Please, provide everything but the label column as numeric values. It is ok to encode classes as integers. """) inp_table = gr.DataFrame(type='numpy', value=example, headers=[''] * 3) inp_file = gr.File( label='Drop either a .csv (without header, only numeric values for all but the labels) or a .arff file.') examples = gr.Examples(examples=['iris.csv', 'balance-scale.arff'], inputs=[inp_file], outputs=[inp_table], fn=upload_file, cache_examples=True) btn = gr.Button("Predict Empty Table Cells") inp_file.change(fn=upload_file, inputs=inp_file, outputs=inp_table) out_text = gr.Markdown() out_table = gr.DataFrame() btn.click(fn=compute, inputs=inp_table, outputs=[out_text, out_table]) demo.launch()