Spaces:

polinaeterna
/

text_quality_checker

Running on Zero

App Files Files Community

polinaeterna HF staff commited on Sep 9

Commit

73bc7cb

•

1 Parent(s): aa43a9d

fix

Browse files

Files changed (1) hide show

app.py +14 -5

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ from collections import Counter
 from requests.adapters import HTTPAdapter, Retry
 import multiprocessing
 import os
 import gradio as gr
 import pandas as pd
@@ -14,7 +15,7 @@ from huggingface_hub import PyTorchModelHubMixin
 import torch
 from torch import nn
 from transformers import AutoModel, AutoTokenizer, AutoConfig
 session = requests.Session()
@@ -162,7 +163,7 @@ def call_perspective_api(texts_df, column_name):#, s):
     yield plt.gcf(), pd.DataFrame.from_dict({column_name: texts, **req_att_scores})
-# @spaces.GPU
 def run_quality_check(dataset, column, batch_size, num_examples):
     info_resp = session.get(f"https://datasets-server.huggingface.co/info?dataset={dataset}", timeout=3).json()
     if "error" in info_resp:
@@ -246,15 +247,23 @@ with gr.Blocks() as demo:
         gr.Markdown("### High")
         df_high = gr.DataFrame()
-    # non_ascii_hist = gr.DataFrame(visible=False)
     non_ascii_hist = gr.Plot()
     texts_sample_df = gr.DataFrame(visible=False)
-    gr_check_btn.click(run_quality_check, inputs=[dataset_name, text_column, batch_size, num_examples], outputs=[progress_bar, plot, df_low, df_medium, df_high, non_ascii_hist, texts_sample_df])
     gr_toxicity_btn = gr.Button("Run perpspective API to check toxicity of random samples.")
     toxicity_hist = gr.Plot()
     with gr.Accordion("Explore examples with toxicity scores:", open=False):
         toxicity_df = gr.DataFrame()
-    gr_toxicity_btn.click(call_perspective_api, inputs=[texts_sample_df, text_column], outputs=[toxicity_hist, toxicity_df])
 demo.launch()

 from requests.adapters import HTTPAdapter, Retry
 import multiprocessing
 import os
+import time
 import gradio as gr
 import pandas as pd
 import torch
 from torch import nn
 from transformers import AutoModel, AutoTokenizer, AutoConfig
+from tqdm import tqdm
 session = requests.Session()
     yield plt.gcf(), pd.DataFrame.from_dict({column_name: texts, **req_att_scores})
+@spaces.GPU
 def run_quality_check(dataset, column, batch_size, num_examples):
     info_resp = session.get(f"https://datasets-server.huggingface.co/info?dataset={dataset}", timeout=3).json()
     if "error" in info_resp:
         gr.Markdown("### High")
         df_high = gr.DataFrame()
     non_ascii_hist = gr.Plot()
     texts_sample_df = gr.DataFrame(visible=False)
+    gr_check_btn.click(
+        run_quality_check,
+        inputs=[dataset_name, text_column, batch_size, num_examples],
+        outputs=[progress_bar, plot, df_low, df_medium, df_high, non_ascii_hist, texts_sample_df]
+    )
     gr_toxicity_btn = gr.Button("Run perpspective API to check toxicity of random samples.")
+    toxicity_progress_bar = gr.Label(show_label=False)
     toxicity_hist = gr.Plot()
     with gr.Accordion("Explore examples with toxicity scores:", open=False):
         toxicity_df = gr.DataFrame()
+    gr_toxicity_btn.click(
+        call_perspective_api,
+        inputs=[texts_sample_df, text_column],
+        outputs=[toxicity_progress_bar, toxicity_hist, toxicity_df]
+    )
 demo.launch()