RamAnanth1 commited on
Commit
0401543
1 Parent(s): 4e1e505

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -53
app.py CHANGED
@@ -22,53 +22,53 @@ def generate_data(file, num_samples):
22
 
23
  return samples
24
 
25
- def generate_relational_data(parent_file, child_file, join_on):
26
- parent_df = pd.read_csv(parent_file.name)
27
- child_df = pd.read_csv(child_file.name)
28
-
29
- #Make sure join_on column exists in both
30
- assert ((join_on in parent_df.columns) and
31
- (join_on in child_df.columns))
32
-
33
- rtf_model.fit(parent_df.drop(join_on, axis=1), num_bootstrap=100)
34
-
35
- pdir = Path("rtf_parent/")
36
- rtf_model.save(pdir)
37
-
38
- # # Get the most recently saved parent model,
39
- # # or a specify some other saved model.
40
- # parent_model_path = pdir / "idXXX"
41
- parent_model_path = sorted([
42
- p for p in pdir.glob("id*") if p.is_dir()],
43
- key=os.path.getmtime)[-1]
44
-
45
- child_model = REaLTabFormer(
46
- model_type="relational",
47
- parent_realtabformer_path=parent_model_path,
48
- epochs = 25,
49
- output_max_length=None,
50
- train_size=0.8)
51
-
52
- child_model.fit(
53
- df=child_df,
54
- in_df=parent_df,
55
- join_on=join_on,
56
- num_bootstrap=10)
57
-
58
- # Generate parent samples.
59
- parent_samples = rtf_model.sample(5)
60
-
61
- # Create the unique ids based on the index.
62
- parent_samples.index.name = join_on
63
- parent_samples = parent_samples.reset_index()
64
-
65
- # Generate the relational observations.
66
- child_samples = child_model.sample(
67
- input_unique_ids=parent_samples[join_on],
68
- input_df=parent_samples.drop(join_on, axis=1),
69
- gen_batch=5)
70
-
71
- return parent_samples, child_samples, gr.update(visible = True)
72
 
73
 
74
 
@@ -172,12 +172,12 @@ with gr.Blocks(css = css) as demo:
172
  num_samples = gr.Slider(label="Number of Samples", minimum=5, maximum=100, value=5, step=10)
173
  generate_data_btn = gr.Button('Generate Synthetic Data')
174
 
175
- with gr.Tab("Upload Data as File: Relational Data"):
176
- data_input_parent = gr.File(label = 'Upload Data File for Parent Dataset', file_types=[ ".csv"])
177
- data_input_child = gr.File(label = 'Upload Data File for Child Dataset', file_types=[ ".csv"])
178
- join_on = gr.Textbox(label = 'Column name to join on')
179
 
180
- generate_data_btn_relational = gr.Button('Generate Synthetic Data')
181
 
182
  with gr.Row():
183
  #data_sample = gr.Dataframe(label = "Original Data")
@@ -187,7 +187,7 @@ with gr.Blocks(css = css) as demo:
187
 
188
 
189
  generate_data_btn.click(generate_data, inputs = [data_input_u,num_samples], outputs = [data_output])
190
- generate_data_btn_relational.click(generate_relational_data, inputs = [data_input_parent,data_input_child,join_on], outputs = [data_output, data_output_child])
191
  examples = gr.Examples(examples=[['diabetes.arff',5], ["titanic.csv", 15]],inputs = [data_input_u,num_samples], outputs = [data_output], cache_examples = True, fn = generate_data)
192
 
193
 
 
22
 
23
  return samples
24
 
25
+ # def generate_relational_data(parent_file, child_file, join_on):
26
+ # parent_df = pd.read_csv(parent_file.name)
27
+ # child_df = pd.read_csv(child_file.name)
28
+
29
+ # #Make sure join_on column exists in both
30
+ # assert ((join_on in parent_df.columns) and
31
+ # (join_on in child_df.columns))
32
+
33
+ # rtf_model.fit(parent_df.drop(join_on, axis=1), num_bootstrap=100)
34
+
35
+ # pdir = Path("rtf_parent/")
36
+ # rtf_model.save(pdir)
37
+
38
+ # # # Get the most recently saved parent model,
39
+ # # # or a specify some other saved model.
40
+ # # parent_model_path = pdir / "idXXX"
41
+ # parent_model_path = sorted([
42
+ # p for p in pdir.glob("id*") if p.is_dir()],
43
+ # key=os.path.getmtime)[-1]
44
+
45
+ # child_model = REaLTabFormer(
46
+ # model_type="relational",
47
+ # parent_realtabformer_path=parent_model_path,
48
+ # epochs = 25,
49
+ # output_max_length=None,
50
+ # train_size=0.8)
51
+
52
+ # child_model.fit(
53
+ # df=child_df,
54
+ # in_df=parent_df,
55
+ # join_on=join_on,
56
+ # num_bootstrap=10)
57
+
58
+ # # Generate parent samples.
59
+ # parent_samples = rtf_model.sample(5)
60
+
61
+ # # Create the unique ids based on the index.
62
+ # parent_samples.index.name = join_on
63
+ # parent_samples = parent_samples.reset_index()
64
+
65
+ # # Generate the relational observations.
66
+ # child_samples = child_model.sample(
67
+ # input_unique_ids=parent_samples[join_on],
68
+ # input_df=parent_samples.drop(join_on, axis=1),
69
+ # gen_batch=5)
70
+
71
+ # return parent_samples, child_samples, gr.update(visible = True)
72
 
73
 
74
 
 
172
  num_samples = gr.Slider(label="Number of Samples", minimum=5, maximum=100, value=5, step=10)
173
  generate_data_btn = gr.Button('Generate Synthetic Data')
174
 
175
+ # with gr.Tab("Upload Data as File: Relational Data"):
176
+ # data_input_parent = gr.File(label = 'Upload Data File for Parent Dataset', file_types=[ ".csv"])
177
+ # data_input_child = gr.File(label = 'Upload Data File for Child Dataset', file_types=[ ".csv"])
178
+ # join_on = gr.Textbox(label = 'Column name to join on')
179
 
180
+ # generate_data_btn_relational = gr.Button('Generate Synthetic Data')
181
 
182
  with gr.Row():
183
  #data_sample = gr.Dataframe(label = "Original Data")
 
187
 
188
 
189
  generate_data_btn.click(generate_data, inputs = [data_input_u,num_samples], outputs = [data_output])
190
+ #generate_data_btn_relational.click(generate_relational_data, inputs = [data_input_parent,data_input_child,join_on], outputs = [data_output, data_output_child])
191
  examples = gr.Examples(examples=[['diabetes.arff',5], ["titanic.csv", 15]],inputs = [data_input_u,num_samples], outputs = [data_output], cache_examples = True, fn = generate_data)
192
 
193