ncoop57 commited on
Commit
a2dd03e
1 Parent(s): e0be252

Add additional checks

Browse files
Files changed (2) hide show
  1. app.py +42 -5
  2. requirements.txt +1 -1
app.py CHANGED
@@ -42,9 +42,12 @@ for name in dataset_names:
42
  )
43
  dataset_data[name] = {
44
  "ds": ds,
45
- "word_rep_ratios": np.random.randn(len(ds)),
46
  "check_char_repetition_criteria": np.array(ds["check_char_repetition_criteria"]),
47
  "check_flagged_words_criteria": np.array(ds["check_flagged_words_criteria"]),
 
 
 
48
  }
49
 
50
  def plt_plot(criteria, dataset, threshold):
@@ -83,6 +86,18 @@ def check_filtered(criteria, dataset, threshold):
83
  with gr.Blocks() as demo:
84
  dataset = gr.Radio(dataset_names, label="Dataset", value="arXiv")
85
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  with gr.Tab("Character Repetition Criteria"):
87
  # plot some random data
88
  plot = gr.Plot()
@@ -95,18 +110,18 @@ with gr.Blocks() as demo:
95
  check_fn = partial(check_filtered, "check_char_repetition_criteria")
96
  check.click(check_fn, [dataset, threshold], filtered_data)
97
 
98
- with gr.Tab("Word Repetition Criteria"):# plot some random data
99
  plot = gr.Plot()
100
  threshold = gr.Slider(minimum=0, maximum=1, label="Threshold")
101
  calculate = gr.Button("Calculate")
102
  check = gr.Button("Check Filtered Data")
103
  filtered_data = gr.Textbox(lines=5, label="Filtered Data")
104
- plot_fn = partial(plt_plot, "word_rep_ratios")
105
  calculate.click(plot_fn, [dataset, threshold], plot)
106
- check_fn = partial(check_filtered, "word_rep_ratios")
107
  check.click(check_fn, [dataset, threshold], filtered_data)
108
 
109
- with gr.Tab("Flagged Word Criteria"):# plot some random data
110
  plot = gr.Plot()
111
  threshold = gr.Slider(minimum=0, maximum=1, label="Threshold")
112
  calculate = gr.Button("Calculate")
@@ -117,5 +132,27 @@ with gr.Blocks() as demo:
117
  check_fn = partial(check_filtered, "check_flagged_words_criteria")
118
  check.click(check_fn, [dataset, threshold], filtered_data)
119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  if __name__ == "__main__":
121
  demo.launch()
 
42
  )
43
  dataset_data[name] = {
44
  "ds": ds,
45
+ "check_word_number_criteria": np.array(ds["check_word_number_criteria"]),
46
  "check_char_repetition_criteria": np.array(ds["check_char_repetition_criteria"]),
47
  "check_flagged_words_criteria": np.array(ds["check_flagged_words_criteria"]),
48
+ "check_stop_word_ratio_criteria": np.array(ds["check_stop_word_ratio_criteria"]),
49
+ "check_perplexity_criteria": np.array(ds["check_perplexity_criteria"]),
50
+ "check_language_criteria": np.array(ds["check_language_criteria"]),
51
  }
52
 
53
  def plt_plot(criteria, dataset, threshold):
 
86
  with gr.Blocks() as demo:
87
  dataset = gr.Radio(dataset_names, label="Dataset", value="arXiv")
88
 
89
+ with gr.Tab("Number of Words Criteria"):
90
+ # plot some random data
91
+ plot = gr.Plot()
92
+ threshold = gr.Slider(minimum=0, maximum=1, label="Threshold")
93
+ calculate = gr.Button("Calculate")
94
+ check = gr.Button("Check Filtered Data")
95
+ filtered_data = gr.Textbox(lines=5, label="Filtered Data")
96
+ plot_fn = partial(plt_plot, "check_word_number_criteria")
97
+ calculate.click(plot_fn, [dataset, threshold], plot)
98
+ check_fn = partial(check_filtered, "check_word_number_criteria")
99
+ check.click(check_fn, [dataset, threshold], filtered_data)
100
+
101
  with gr.Tab("Character Repetition Criteria"):
102
  # plot some random data
103
  plot = gr.Plot()
 
110
  check_fn = partial(check_filtered, "check_char_repetition_criteria")
111
  check.click(check_fn, [dataset, threshold], filtered_data)
112
 
113
+ with gr.Tab("Stop Word Ratio Criteria"):
114
  plot = gr.Plot()
115
  threshold = gr.Slider(minimum=0, maximum=1, label="Threshold")
116
  calculate = gr.Button("Calculate")
117
  check = gr.Button("Check Filtered Data")
118
  filtered_data = gr.Textbox(lines=5, label="Filtered Data")
119
+ plot_fn = partial(plt_plot, "check_stop_word_ratio_criteria")
120
  calculate.click(plot_fn, [dataset, threshold], plot)
121
+ check_fn = partial(check_filtered, "check_stop_word_ratio_criteria")
122
  check.click(check_fn, [dataset, threshold], filtered_data)
123
 
124
+ with gr.Tab("Flagged Word Criteria"):
125
  plot = gr.Plot()
126
  threshold = gr.Slider(minimum=0, maximum=1, label="Threshold")
127
  calculate = gr.Button("Calculate")
 
132
  check_fn = partial(check_filtered, "check_flagged_words_criteria")
133
  check.click(check_fn, [dataset, threshold], filtered_data)
134
 
135
+ with gr.Tab("Perplexity Criteria"):
136
+ plot = gr.Plot()
137
+ threshold = gr.Slider(minimum=0, maximum=1, label="Threshold")
138
+ calculate = gr.Button("Calculate")
139
+ check = gr.Button("Check Filtered Data")
140
+ filtered_data = gr.Textbox(lines=5, label="Filtered Data")
141
+ plot_fn = partial(plt_plot, "check_perplexity_criteria")
142
+ calculate.click(plot_fn, [dataset, threshold], plot)
143
+ check_fn = partial(check_filtered, "check_perplexity_criteria")
144
+ check.click(check_fn, [dataset, threshold], filtered_data)
145
+
146
+ with gr.Tab("Language Detection Criteria"):
147
+ plot = gr.Plot()
148
+ threshold = gr.Slider(minimum=0, maximum=1, label="Threshold")
149
+ calculate = gr.Button("Calculate")
150
+ check = gr.Button("Check Filtered Data")
151
+ filtered_data = gr.Textbox(lines=5, label="Filtered Data")
152
+ plot_fn = partial(plt_plot, "check_language_criteria")
153
+ calculate.click(plot_fn, [dataset, threshold], plot)
154
+ check_fn = partial(check_filtered, "check_language_criteria")
155
+ check.click(check_fn, [dataset, threshold], filtered_data)
156
+
157
  if __name__ == "__main__":
158
  demo.launch()
requirements.txt CHANGED
@@ -1,2 +1,2 @@
1
  scrubadub
2
- squeakily
 
1
  scrubadub
2
+ git+https://github.com/CarperAI/squeakily.git