Spaces:
Runtime error
Runtime error
import gradio as gr | |
import pandas as pd | |
from widgets.widget_base import Widget | |
from data_measurements.dataset_statistics import DatasetStatisticsCacheClass as dmt_cls | |
import utils | |
logs = utils.prepare_logging(__file__) | |
class GeneralStats(Widget): | |
def __init__(self): | |
self.general_stats = gr.Markdown(render=False) | |
self.general_stats_top_vocab = gr.DataFrame(render=False) | |
self.general_stats_missing = gr.Markdown(render=False) | |
self.general_stats_duplicates = gr.Markdown(render=False) | |
def render(self): | |
with gr.TabItem(f"General Text Statistics"): | |
self.general_stats.render() | |
self.general_stats_missing.render() | |
self.general_stats_duplicates.render() | |
self.general_stats_top_vocab.render() | |
def update(self, dstats: dmt_cls): | |
general_stats_text = f""" | |
Use this widget to check whether the terms you see most represented in the dataset make sense for the goals of the dataset. | |
There are {str(dstats.total_words)} total words. | |
There are {dstats.total_open_words} after removing closed class words. | |
The most common [open class words](https://dictionary.apa.org/open-class-words) and their counts are: | |
""" | |
top_vocab = pd.DataFrame(dstats.sorted_top_vocab_df).round(4) | |
missing_text = ( | |
f"There are {dstats.text_nan_count} missing values in the dataset" | |
) | |
if dstats.dups_frac > 0: | |
dupes_text = f"The dataset is {round(dstats.dups_frac * 100, 2)}% duplicates, For more information about the duplicates, click the 'Duplicates' tab." | |
else: | |
dupes_text = "There are 0 duplicate items in the dataset" | |
return { | |
self.general_stats: general_stats_text, | |
self.general_stats_top_vocab: top_vocab, | |
self.general_stats_missing: missing_text, | |
self.general_stats_duplicates: dupes_text, | |
} | |
def output_components(self): | |
return [ | |
self.general_stats, | |
self.general_stats_top_vocab, | |
self.general_stats_missing, | |
self.general_stats_duplicates, | |
] | |
def add_events(self, state: gr.State): | |
pass | |