ncoop57 commited on
Commit
2c09868
1 Parent(s): 37ed6eb

Add filtering lesser as well as greater

Browse files
Files changed (1) hide show
  1. app.py +16 -6
app.py CHANGED
@@ -50,11 +50,11 @@ for name in dataset_names:
50
  "check_compression_ratio_criteria": np.array(ds["check_compression_ratio_criteria"]),
51
  }
52
 
53
- def plt_plot(criteria, dataset, threshold):
54
  plt.close("all")
55
  x = dataset_data[dataset][criteria]
56
  # calculate percentage of data that will be removed given threshold
57
- perc = np.sum(x > threshold) / len(x)
58
  # create a figure
59
  fig = plt.figure()
60
  # add a subplot
@@ -72,10 +72,12 @@ def plt_plot(criteria, dataset, threshold):
72
  plt.tight_layout()
73
  return fig
74
 
75
- def check_filtered(criteria, dataset, threshold):
76
  ds = dataset_data[dataset]["ds"]
77
 
78
- filtered_ds = ds.filter(lambda x: x[criteria] > threshold)
 
 
79
  if len(filtered_ds) == 0:
80
  return "No examples found"
81
  # get random sample of 1
@@ -161,9 +163,17 @@ with gr.Blocks() as demo:
161
  calculate = gr.Button("Calculate")
162
  check = gr.Button("Check Filtered Data")
163
  filtered_data = gr.Textbox(lines=5, label="Filtered Data")
164
- plot_fn = partial(plt_plot, "check_compression_ratio_criteria")
 
 
 
 
165
  calculate.click(plot_fn, [dataset, threshold], plot)
166
- check_fn = partial(check_filtered, "check_compression_ratio_criteria")
 
 
 
 
167
  check.click(check_fn, [dataset, threshold], filtered_data)
168
 
169
  if __name__ == "__main__":
 
50
  "check_compression_ratio_criteria": np.array(ds["check_compression_ratio_criteria"]),
51
  }
52
 
53
+ def plt_plot(criteria, dataset, threshold, greater_than=True):
54
  plt.close("all")
55
  x = dataset_data[dataset][criteria]
56
  # calculate percentage of data that will be removed given threshold
57
+ perc = np.sum(x > threshold if greater_than else x < threshold) / len(x)
58
  # create a figure
59
  fig = plt.figure()
60
  # add a subplot
 
72
  plt.tight_layout()
73
  return fig
74
 
75
+ def check_filtered(criteria, dataset, threshold, greater_than=True):
76
  ds = dataset_data[dataset]["ds"]
77
 
78
+ filtered_ds = ds.filter(
79
+ lambda x: x[criteria] > threshold if greater_than else x[criteria] < threshold
80
+ )
81
  if len(filtered_ds) == 0:
82
  return "No examples found"
83
  # get random sample of 1
 
163
  calculate = gr.Button("Calculate")
164
  check = gr.Button("Check Filtered Data")
165
  filtered_data = gr.Textbox(lines=5, label="Filtered Data")
166
+ plot_fn = partial(
167
+ plt_plot,
168
+ "check_compression_ratio_criteria",
169
+ greater_than=False
170
+ )
171
  calculate.click(plot_fn, [dataset, threshold], plot)
172
+ check_fn = partial(
173
+ check_filtered,
174
+ "check_compression_ratio_criteria",
175
+ greater_than=False
176
+ )
177
  check.click(check_fn, [dataset, threshold], filtered_data)
178
 
179
  if __name__ == "__main__":