RamAnanth1 commited on
Commit
8eae5c4
1 Parent(s): 0401543

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -57
app.py CHANGED
@@ -22,53 +22,53 @@ def generate_data(file, num_samples):
22
 
23
  return samples
24
 
25
- # def generate_relational_data(parent_file, child_file, join_on):
26
- # parent_df = pd.read_csv(parent_file.name)
27
- # child_df = pd.read_csv(child_file.name)
28
-
29
- # #Make sure join_on column exists in both
30
- # assert ((join_on in parent_df.columns) and
31
- # (join_on in child_df.columns))
32
-
33
- # rtf_model.fit(parent_df.drop(join_on, axis=1), num_bootstrap=100)
34
-
35
- # pdir = Path("rtf_parent/")
36
- # rtf_model.save(pdir)
37
-
38
- # # # Get the most recently saved parent model,
39
- # # # or a specify some other saved model.
40
- # # parent_model_path = pdir / "idXXX"
41
- # parent_model_path = sorted([
42
- # p for p in pdir.glob("id*") if p.is_dir()],
43
- # key=os.path.getmtime)[-1]
44
-
45
- # child_model = REaLTabFormer(
46
- # model_type="relational",
47
- # parent_realtabformer_path=parent_model_path,
48
- # epochs = 25,
49
- # output_max_length=None,
50
- # train_size=0.8)
51
-
52
- # child_model.fit(
53
- # df=child_df,
54
- # in_df=parent_df,
55
- # join_on=join_on,
56
- # num_bootstrap=10)
57
-
58
- # # Generate parent samples.
59
- # parent_samples = rtf_model.sample(5)
60
-
61
- # # Create the unique ids based on the index.
62
- # parent_samples.index.name = join_on
63
- # parent_samples = parent_samples.reset_index()
64
-
65
- # # Generate the relational observations.
66
- # child_samples = child_model.sample(
67
- # input_unique_ids=parent_samples[join_on],
68
- # input_df=parent_samples.drop(join_on, axis=1),
69
- # gen_batch=5)
70
-
71
- # return parent_samples, child_samples, gr.update(visible = True)
72
 
73
 
74
 
@@ -162,22 +162,18 @@ with gr.Blocks(css = css) as demo:
162
  ''')
163
 
164
  with gr.Column():
165
- #gr.Markdown(""" ### Record audio """)
166
- # with gr.Tab("Record Audio"):
167
- # audio_input_r = gr.Audio(label = 'Record Audio Input',source="microphone",type="filepath")
168
- # transcribe_audio_r = gr.Button('Transcribe')
169
 
170
  with gr.Tab("Upload Data as File: Tabular Data"):
171
  data_input_u = gr.File(label = 'Upload Data File (Currently supports CSV and ARFF)', file_types=[".csv", ".arff"])
172
  num_samples = gr.Slider(label="Number of Samples", minimum=5, maximum=100, value=5, step=10)
173
  generate_data_btn = gr.Button('Generate Synthetic Data')
174
 
175
- # with gr.Tab("Upload Data as File: Relational Data"):
176
- # data_input_parent = gr.File(label = 'Upload Data File for Parent Dataset', file_types=[ ".csv"])
177
- # data_input_child = gr.File(label = 'Upload Data File for Child Dataset', file_types=[ ".csv"])
178
- # join_on = gr.Textbox(label = 'Column name to join on')
179
 
180
- # generate_data_btn_relational = gr.Button('Generate Synthetic Data')
181
 
182
  with gr.Row():
183
  #data_sample = gr.Dataframe(label = "Original Data")
@@ -187,7 +183,7 @@ with gr.Blocks(css = css) as demo:
187
 
188
 
189
  generate_data_btn.click(generate_data, inputs = [data_input_u,num_samples], outputs = [data_output])
190
- #generate_data_btn_relational.click(generate_relational_data, inputs = [data_input_parent,data_input_child,join_on], outputs = [data_output, data_output_child])
191
  examples = gr.Examples(examples=[['diabetes.arff',5], ["titanic.csv", 15]],inputs = [data_input_u,num_samples], outputs = [data_output], cache_examples = True, fn = generate_data)
192
 
193
 
 
22
 
23
  return samples
24
 
25
+ def generate_relational_data(parent_file, child_file, join_on):
26
+ parent_df = pd.read_csv(parent_file.name)
27
+ child_df = pd.read_csv(child_file.name)
28
+
29
+ #Make sure join_on column exists in both
30
+ assert ((join_on in parent_df.columns) and
31
+ (join_on in child_df.columns))
32
+
33
+ rtf_model.fit(parent_df.drop(join_on, axis=1), num_bootstrap=100)
34
+
35
+ pdir = Path("rtf_parent/")
36
+ rtf_model.save(pdir)
37
+
38
+ # # Get the most recently saved parent model,
39
+ # # or a specify some other saved model.
40
+ # parent_model_path = pdir / "idXXX"
41
+ parent_model_path = sorted([
42
+ p for p in pdir.glob("id*") if p.is_dir()],
43
+ key=os.path.getmtime)[-1]
44
+
45
+ child_model = REaLTabFormer(
46
+ model_type="relational",
47
+ parent_realtabformer_path=parent_model_path,
48
+ epochs = 25,
49
+ output_max_length=None,
50
+ train_size=0.8)
51
+
52
+ child_model.fit(
53
+ df=child_df,
54
+ in_df=parent_df,
55
+ join_on=join_on,
56
+ num_bootstrap=10)
57
+
58
+ # Generate parent samples.
59
+ parent_samples = rtf_model.sample(5)
60
+
61
+ # Create the unique ids based on the index.
62
+ parent_samples.index.name = join_on
63
+ parent_samples = parent_samples.reset_index()
64
+
65
+ # Generate the relational observations.
66
+ child_samples = child_model.sample(
67
+ input_unique_ids=parent_samples[join_on],
68
+ input_df=parent_samples.drop(join_on, axis=1),
69
+ gen_batch=5)
70
+
71
+ return parent_samples, child_samples, gr.update(visible = True)
72
 
73
 
74
 
 
162
  ''')
163
 
164
  with gr.Column():
 
 
 
 
165
 
166
  with gr.Tab("Upload Data as File: Tabular Data"):
167
  data_input_u = gr.File(label = 'Upload Data File (Currently supports CSV and ARFF)', file_types=[".csv", ".arff"])
168
  num_samples = gr.Slider(label="Number of Samples", minimum=5, maximum=100, value=5, step=10)
169
  generate_data_btn = gr.Button('Generate Synthetic Data')
170
 
171
+ with gr.Tab("Upload Data as File: Relational Data"):
172
+ data_input_parent = gr.File(label = 'Upload Data File for Parent Dataset', file_types=[ ".csv"])
173
+ data_input_child = gr.File(label = 'Upload Data File for Child Dataset', file_types=[ ".csv"])
174
+ join_on = gr.Textbox(label = 'Column name to join on')
175
 
176
+ generate_data_btn_relational = gr.Button('Generate Synthetic Data')
177
 
178
  with gr.Row():
179
  #data_sample = gr.Dataframe(label = "Original Data")
 
183
 
184
 
185
  generate_data_btn.click(generate_data, inputs = [data_input_u,num_samples], outputs = [data_output])
186
+ generate_data_btn_relational.click(generate_relational_data, inputs = [data_input_parent,data_input_child,join_on], outputs = [data_output, data_output_child])
187
  examples = gr.Examples(examples=[['diabetes.arff',5], ["titanic.csv", 15]],inputs = [data_input_u,num_samples], outputs = [data_output], cache_examples = True, fn = generate_data)
188
 
189