import gradio as gr import pandas as pd from realtabformer import REaLTabFormer from scipy.io import arff rtf_model = REaLTabFormer( model_type="tabular", epochs=25, # Default is 200 gradient_accumulation_steps=4) def generate_data(file, num_samples): if '.arff' in file.name: data = arff.loadarff(open(file.name,'rt')) df = pd.DataFrame(data[0]) elif '.csv' in file.name: df = pd.read_csv(file.name) rtf_model.fit(df, num_bootstrap=100) # Default is 500 # Generate synthetic data samples = rtf_model.sample(n_samples=num_samples) return samples css = """ .gradio-container { font-family: 'IBM Plex Sans', sans-serif; } .gr-button { color: white; border-color: black; background: black; } input[type='range'] { accent-color: black; } .dark input[type='range'] { accent-color: #dfdfdf; } .container { max-width: 430px; margin: auto; padding-top: 1.5rem; } #gallery { min-height: 22rem; margin-bottom: 15px; margin-left: auto; margin-right: auto; border-bottom-right-radius: .5rem !important; border-bottom-left-radius: .5rem !important; } #gallery>div>.h-full { min-height: 20rem; } .details:hover { text-decoration: underline; } .gr-button { white-space: nowrap; } .gr-button:focus { border-color: rgb(147 197 253 / var(--tw-border-opacity)); outline: none; box-shadow: var(--tw-ring-offset-shadow), var(--tw-ring-shadow), var(--tw-shadow, 0 0 #0000); --tw-border-opacity: 1; --tw-ring-offset-shadow: var(--tw-ring-inset) 0 0 0 var(--tw-ring-offset-width) var(--tw-ring-offset-color); --tw-ring-shadow: var(--tw-ring-inset) 0 0 0 calc(3px var(--tw-ring-offset-width)) var(--tw-ring-color); --tw-ring-color: rgb(191 219 254 / var(--tw-ring-opacity)); --tw-ring-opacity: .5; } #advanced-btn { font-size: .7rem !important; line-height: 19px; margin-top: 12px; margin-bottom: 12px; padding: 2px 8px; border-radius: 14px !important; } #advanced-options { display: none; margin-bottom: 20px; } .footer { margin-bottom: 45px; margin-top: 35px; text-align: center; border-bottom: 1px solid #e5e5e5; } .footer>p { font-size: .8rem; display: inline-block; padding: 0 10px; transform: translateY(10px); background: white; } .dark .footer { border-color: #303030; } .dark .footer>p { background: #0b0f19; } """ with gr.Blocks(css = css) as demo: gr.Markdown(""" ## REaLTabFormer: Generating Realistic Relational and Tabular Data using Transformers """) gr.HTML('''

This is an unofficial demo for REaLTabFormer that can be used to generate synthetic data from single tabular data using GPT. The demo is based on the Github implementation provided by the authors.

''') with gr.Column(): #gr.Markdown(""" ### Record audio """) # with gr.Tab("Record Audio"): # audio_input_r = gr.Audio(label = 'Record Audio Input',source="microphone",type="filepath") # transcribe_audio_r = gr.Button('Transcribe') with gr.Tab("Upload Data as File"): data_input_u = gr.File(label = 'Upload Data File', file_types=["text", ".json", ".csv", ".arff"]) num_samples = gr.Slider(label="Number of Samples", minimum=5, maximum=100, value=5, step=10) generate_data_btn = gr.Button('Generate Synthetic Data') with gr.Row(): #data_sample = gr.Dataframe(label = "Original Data") data_output = gr.Dataframe(label = "Synthetic Data") generate_data_btn.click(generate_data, inputs = [data_input_u,num_samples], outputs = [data_output]) examples = gr.Examples(examples=[['diabetes.arff',5], ["titanic.csv", 15]],inputs = [data_input_u,num_samples], outputs = [data_output], cache_examples = True, fn = generate_data) demo.launch()