Spaces:
Runtime error
Runtime error
import sys | |
tabpfn_path = 'TabPFN' | |
sys.path.insert(0, tabpfn_path) # our submodule of the TabPFN repo (at 045c8400203ebd062346970b4f2c0ccda5a40618) | |
from TabPFN.scripts.transformer_prediction_interface import TabPFNClassifier | |
import numpy as np | |
import pandas as pd | |
import torch | |
import gradio as gr | |
import openml | |
def compute(table: np.array): | |
vfunc = np.vectorize(lambda s: len(s)) | |
non_empty_row_mask = (vfunc(table).sum(1) != 0) | |
table = table[non_empty_row_mask] | |
empty_mask = table == '' | |
empty_inds = np.where(empty_mask) | |
if not len(empty_inds[0]): | |
return "**Please leave at least one field blank for prediction.**", None | |
if not np.all(empty_inds[1][0] == empty_inds[1]): | |
return "**Please only leave fields of one column blank for prediction.**", None | |
y_column = empty_inds[1][0] | |
eval_lines = empty_inds[0] | |
train_table = np.delete(table, eval_lines, axis=0) | |
eval_table = table[eval_lines] | |
try: | |
x_train = torch.tensor(np.delete(train_table, y_column, axis=1).astype(np.float32)) | |
x_eval = torch.tensor(np.delete(eval_table, y_column, axis=1).astype(np.float32)) | |
y_train = train_table[:, y_column] | |
except ValueError: | |
return "**Please only add numbers (to the inputs) or leave fields empty.**", None | |
classifier = TabPFNClassifier(base_path=tabpfn_path, device='cpu') | |
classifier.fit(x_train, y_train) | |
y_eval, p_eval = classifier.predict(x_eval, return_winning_probability=True) | |
# print(file, type(file)) | |
out_table = table.copy().astype(str) | |
out_table[eval_lines, y_column] = [f"{y_e} (p={p_e:.2f})" for y_e, p_e in zip(y_eval, p_eval)] | |
return None, out_table | |
def upload_file(file): | |
if file.name.endswith('.arff'): | |
dataset = openml.datasets.OpenMLDataset('t', 'test', data_file=file.name) | |
X_, _, categorical_indicator_, attribute_names_ = dataset.get_data( | |
dataset_format="array" | |
) | |
df = pd.DataFrame(X_, columns=attribute_names_) | |
return df | |
elif file.name.endswith('.csv') or file.name.endswith('.data'): | |
df = pd.read_csv(file.name, header=None) | |
df.columns = np.arange(len(df.columns)) | |
print(df) | |
return df | |
example = \ | |
[ | |
[1, 2, 1], | |
[2, 1, 1], | |
[1, 1, 1], | |
[2, 2, 2], | |
[3, 4, 2], | |
[3, 2, 2], | |
[2, 3, ''] | |
] | |
with gr.Blocks() as demo: | |
gr.Markdown("""This demo allows you to play with the **TabPFN**. | |
You can either change the table manually (we have filled it with a toy benchmark, sum up to 3 has label 1 and over that label 2). | |
The network predicts fields you leave empty. Only one column can have empty entries that are predicted. | |
Please, provide everything but the label column as numeric values. It is ok to encode classes as integers. | |
""") | |
inp_table = gr.DataFrame(type='numpy', value=example, headers=[''] * 3) | |
inp_file = gr.File( | |
label='Drop either a .csv (without header, only numeric values for all but the labels) or a .arff file.') | |
examples = gr.Examples(examples=['iris.csv', 'balance-scale.arff'], | |
inputs=[inp_file], | |
outputs=[inp_table], | |
fn=upload_file, | |
cache_examples=True) | |
btn = gr.Button("Predict Empty Table Cells") | |
inp_file.change(fn=upload_file, inputs=inp_file, outputs=inp_table) | |
out_text = gr.Markdown() | |
out_table = gr.DataFrame() | |
btn.click(fn=compute, inputs=inp_table, outputs=[out_text, out_table]) | |
demo.launch() |