polinaeterna HF staff commited on
Commit
73bc7cb
1 Parent(s): aa43a9d
Files changed (1) hide show
  1. app.py +14 -5
app.py CHANGED
@@ -3,6 +3,7 @@ from collections import Counter
3
  from requests.adapters import HTTPAdapter, Retry
4
  import multiprocessing
5
  import os
 
6
 
7
  import gradio as gr
8
  import pandas as pd
@@ -14,7 +15,7 @@ from huggingface_hub import PyTorchModelHubMixin
14
  import torch
15
  from torch import nn
16
  from transformers import AutoModel, AutoTokenizer, AutoConfig
17
-
18
 
19
 
20
  session = requests.Session()
@@ -162,7 +163,7 @@ def call_perspective_api(texts_df, column_name):#, s):
162
  yield plt.gcf(), pd.DataFrame.from_dict({column_name: texts, **req_att_scores})
163
 
164
 
165
- # @spaces.GPU
166
  def run_quality_check(dataset, column, batch_size, num_examples):
167
  info_resp = session.get(f"https://datasets-server.huggingface.co/info?dataset={dataset}", timeout=3).json()
168
  if "error" in info_resp:
@@ -246,15 +247,23 @@ with gr.Blocks() as demo:
246
  gr.Markdown("### High")
247
  df_high = gr.DataFrame()
248
 
249
- # non_ascii_hist = gr.DataFrame(visible=False)
250
  non_ascii_hist = gr.Plot()
251
  texts_sample_df = gr.DataFrame(visible=False)
252
- gr_check_btn.click(run_quality_check, inputs=[dataset_name, text_column, batch_size, num_examples], outputs=[progress_bar, plot, df_low, df_medium, df_high, non_ascii_hist, texts_sample_df])
 
 
 
 
253
 
254
  gr_toxicity_btn = gr.Button("Run perpspective API to check toxicity of random samples.")
 
255
  toxicity_hist = gr.Plot()
256
  with gr.Accordion("Explore examples with toxicity scores:", open=False):
257
  toxicity_df = gr.DataFrame()
258
- gr_toxicity_btn.click(call_perspective_api, inputs=[texts_sample_df, text_column], outputs=[toxicity_hist, toxicity_df])
 
 
 
 
259
 
260
  demo.launch()
 
3
  from requests.adapters import HTTPAdapter, Retry
4
  import multiprocessing
5
  import os
6
+ import time
7
 
8
  import gradio as gr
9
  import pandas as pd
 
15
  import torch
16
  from torch import nn
17
  from transformers import AutoModel, AutoTokenizer, AutoConfig
18
+ from tqdm import tqdm
19
 
20
 
21
  session = requests.Session()
 
163
  yield plt.gcf(), pd.DataFrame.from_dict({column_name: texts, **req_att_scores})
164
 
165
 
166
+ @spaces.GPU
167
  def run_quality_check(dataset, column, batch_size, num_examples):
168
  info_resp = session.get(f"https://datasets-server.huggingface.co/info?dataset={dataset}", timeout=3).json()
169
  if "error" in info_resp:
 
247
  gr.Markdown("### High")
248
  df_high = gr.DataFrame()
249
 
 
250
  non_ascii_hist = gr.Plot()
251
  texts_sample_df = gr.DataFrame(visible=False)
252
+ gr_check_btn.click(
253
+ run_quality_check,
254
+ inputs=[dataset_name, text_column, batch_size, num_examples],
255
+ outputs=[progress_bar, plot, df_low, df_medium, df_high, non_ascii_hist, texts_sample_df]
256
+ )
257
 
258
  gr_toxicity_btn = gr.Button("Run perpspective API to check toxicity of random samples.")
259
+ toxicity_progress_bar = gr.Label(show_label=False)
260
  toxicity_hist = gr.Plot()
261
  with gr.Accordion("Explore examples with toxicity scores:", open=False):
262
  toxicity_df = gr.DataFrame()
263
+ gr_toxicity_btn.click(
264
+ call_perspective_api,
265
+ inputs=[texts_sample_df, text_column],
266
+ outputs=[toxicity_progress_bar, toxicity_hist, toxicity_df]
267
+ )
268
 
269
  demo.launch()