Spaces:

avid-ml
/

biasaware

Sleeping

App Files Files Community

freyam commited on Nov 3, 2023

Commit

8ab9329

•

1 Parent(s): 7192c24

Add sample size limit and AVID report

Browse files

Files changed (6) hide show

app.py +98 -18
config/methodologies.json +6 -3
requirements.txt +2 -1
scripts/genbit.py +1 -2
scripts/gender_distribution.py +4 -8
scripts/gender_profession_bias.py +1 -3

app.py CHANGED Viewed

@@ -1,7 +1,9 @@
 import os
 import json
 import gradio as gr
 import pandas as pd
 from scripts.genbit import *
 from scripts.gender_profession_bias import *
@@ -10,8 +12,12 @@ from scripts.gender_distribution import *
 from datasets import load_dataset as hf_load_dataset
 from huggingface_hub import DatasetFilter, list_datasets
 MAX_THRESHOLD = 1000
-METHODOLOGIES = json.load(open("config/methodologies.json", "r"))
 DATASET = {
     "name": None,
@@ -21,10 +27,69 @@ DATASET = {
     "sampling_size": None,
     "column": None,
     "methodology": None,
 }
 def evaluate():
     print(
         f"Dataset          : {DATASET['name']}\n"
         f"Source           : {DATASET['source']}\n"
@@ -32,9 +97,13 @@ def evaluate():
         f"Sampling Size    : {DATASET['sampling_size']}\n"
         f"Column           : {DATASET['column']}\n"
         f"Methodology      : {DATASET['methodology']}\n"
     )
     try:
         data = DATASET["df"].copy()
         data = data[[DATASET["column"]]]
@@ -45,27 +114,37 @@ def evaluate():
         elif DATASET["sampling_method"] == "Random":
             data = data.sample(n=DATASET["sampling_size"], random_state=42)
-        result_df, result_plot, result_description = globals()[
             METHODOLOGIES.get(DATASET["methodology"]).get("fx")
         ](data)
         return (
-            gr.Markdown(f"### Result Summary\n\nlorem ipsum", visible=True),
             gr.Plot(result_plot, visible=True),
             gr.Dataframe(result_df, visible=True),
         )
     except Exception as e:
         print(e)
         return (
-            gr.Markdown(visible=False),
             gr.Plot(visible=False),
             gr.Dataframe(visible=False),
         )
 def load_dataset(local_dataset, hf_dataset):
     DATASET["name"] = (
-        local_dataset.name.split("/")[-1].split(".")[0] if local_dataset else hf_dataset
     )
     DATASET["source"] = "Local Dataset" if local_dataset else "HuggingFace Hub"
     DATASET["df"] = (
@@ -227,7 +306,6 @@ with BiasAware:
             )
             hf_dataset = gr.Textbox(visible=False)
-            hf_dataset_search_results = gr.Radio(visible=False)
             with gr.Row():
                 with gr.Column(scale=1):
@@ -261,16 +339,17 @@ with BiasAware:
                 visible=True,
             )
-            result_description = gr.Markdown(visible=False)
-            result_plot = gr.Plot(show_label=False, container=False, visible=False)
             result_df = gr.DataFrame(visible=False)
-            submit_to_avid_btn = gr.Button(
-                value="Submit to AVID",
                 interactive=False,
                 variant="primary",
             )
     #
     #    Event Handlers
     #
@@ -322,12 +401,12 @@ with BiasAware:
     hf_dataset.submit(
         fn=show_hf_dataset_search_results,
         inputs=[hf_dataset],
-        outputs=[dataset_load_btn, hf_dataset_search_results],
     )
     dataset_load_btn.click(
         fn=load_dataset,
-        inputs=[local_dataset, hf_dataset_search_results],
         outputs=[
             dataset_sampling_method,
             dataset_sampling_size,
@@ -365,14 +444,15 @@ with BiasAware:
     )
     evaluation_btn.click(
-        fn=evaluate, inputs=None, outputs=[result_description, result_plot, result_df]
     )
-    submit_to_avid_btn.click(
-        fn=None,
-        inputs=None,
-        outputs=None,
     )
-BiasAware.launch()

 import os
 import json
+import timeit
 import gradio as gr
 import pandas as pd
+from datetime import date
 from scripts.genbit import *
 from scripts.gender_profession_bias import *
 from datasets import load_dataset as hf_load_dataset
 from huggingface_hub import DatasetFilter, list_datasets
+from avidtools.datamodels.report import Report
+from avidtools.datamodels.components import *
+from avidtools.datamodels.enums import *
 MAX_THRESHOLD = 1000
+METHODOLOGIES = json.load(open("config/methodologies.json", "r", encoding="utf8"))
 DATASET = {
     "name": None,
     "sampling_size": None,
     "column": None,
     "methodology": None,
+    "result_df": None,
 }
+def generate_avid_report():
+    dataset_id = DATASET["name"]
+    methodology = DATASET["methodology"]
+    result_json = DATASET["result_df"].to_dict(orient="list")
+    report = Report()
+    report.affects = Affects(
+        developer=[],
+        deployer=["Hugging Face"] if DATASET["source"] == "HuggingFace Hub" else [],
+        artifacts=[Artifact(type=ArtifactTypeEnum.dataset, name=dataset_id)],
+    )
+    report.problemtype = Problemtype(
+        classof=ClassEnum.na,
+        type=TypeEnum.detection,
+        description=LangValue(
+            lang="eng", value="Dataset Bias Detection using BiasAware"
+        ),
+    )
+    report.metrics = [
+        Metric(
+            name=methodology,
+            detection_method=Detection(type=MethodEnum.test, name=methodology),
+            results=result_json,
+        )
+    ]
+    report.references = (
+        [
+            Reference(
+                label="""{dataset_id} on Hugging Face""".format(model_id=dataset_id),
+                url="""https://huggingface.co/{dataset_id}""".format(
+                    dataset_id=dataset_id
+                ),
+            )
+        ]
+        if DATASET["source"] == "HuggingFace Hub"
+        else []
+    )
+    report.description = LangValue(
+        lang="eng", value=METHODOLOGIES[methodology]["short_description"]
+    )
+    report.impact = Impact(
+        avid=AvidTaxonomy(
+            vuln_id="",
+            risk_domain=["Ethics"],
+            sep_view=[SepEnum.E0101],
+            lifecycle_view=[LifecycleEnum.L03],
+            taxonomy_version="0.2",
+        )
+    )
+    report.reported_date = date.today()
+    return gr.JSON(value=report.model_dump(), visible=True)
 def evaluate():
+    if DATASET["methodology"] == "GenBiT (Microsoft Gender Bias Tool)":
+        DATASET["sampling_size"] = min(DATASET["sampling_size"], 100)
     print(
         f"Dataset          : {DATASET['name']}\n"
         f"Source           : {DATASET['source']}\n"
         f"Sampling Size    : {DATASET['sampling_size']}\n"
         f"Column           : {DATASET['column']}\n"
         f"Methodology      : {DATASET['methodology']}\n"
+        f"Time Taken       : ",
+        end="",
     )
     try:
+        start = timeit.default_timer()
         data = DATASET["df"].copy()
         data = data[[DATASET["column"]]]
         elif DATASET["sampling_method"] == "Random":
             data = data.sample(n=DATASET["sampling_size"], random_state=42)
+        result_df, result_plot = globals()[
             METHODOLOGIES.get(DATASET["methodology"]).get("fx")
         ](data)
+        DATASET["result_df"] = result_df
+        stop = timeit.default_timer()
+        print(f"{stop - start:.2f} seconds")
         return (
             gr.Plot(result_plot, visible=True),
             gr.Dataframe(result_df, visible=True),
+            gr.Button(visible=True, interactive=True),
+            gr.JSON(visible=True),
         )
     except Exception as e:
         print(e)
         return (
             gr.Plot(visible=False),
             gr.Dataframe(visible=False),
+            gr.Button(visible=False),
+            gr.JSON(visible=False),
         )
 def load_dataset(local_dataset, hf_dataset):
     DATASET["name"] = (
+        os.path.splitext(os.path.basename(local_dataset.name))[0]
+        if local_dataset
+        else hf_dataset
     )
     DATASET["source"] = "Local Dataset" if local_dataset else "HuggingFace Hub"
     DATASET["df"] = (
             )
             hf_dataset = gr.Textbox(visible=False)
             with gr.Row():
                 with gr.Column(scale=1):
                 visible=True,
             )
+            result_plot = gr.Plot(show_label=False, container=False)
             result_df = gr.DataFrame(visible=False)
+            generate_avid_report_btn = gr.Button(
+                value="Generate AVID Report",
                 interactive=False,
                 variant="primary",
             )
+            avid_report = gr.JSON(label="AVID Report", visible=False)
     #
     #    Event Handlers
     #
     hf_dataset.submit(
         fn=show_hf_dataset_search_results,
         inputs=[hf_dataset],
+        outputs=[dataset_load_btn],
     )
     dataset_load_btn.click(
         fn=load_dataset,
+        inputs=[local_dataset, hf_dataset],
         outputs=[
             dataset_sampling_method,
             dataset_sampling_size,
     )
     evaluation_btn.click(
+        fn=evaluate,
+        inputs=None,
+        outputs=[result_plot, result_df, generate_avid_report_btn, avid_report],
     )
+    generate_avid_report_btn.click(
+        fn=generate_avid_report, inputs=None, outputs=[avid_report]
     )
+if __name__ == "__main__":
+    BiasAware.launch()

config/methodologies.json CHANGED Viewed

@@ -1,14 +1,17 @@
 {
   "Gender Distribution (Term Identity Diversity)": {
-    "description": "Gender distribution is an essential aspect of identity diversity, representing the presence and balance of different gender identities within a given population or dataset. Understanding gender distribution is crucial for promoting inclusivity and equity in various contexts, such as workplaces, educational institutions, and social settings.\nIn this analysis, we examine gender distribution using a set of predefined gender identity categories. Each category represents a specific gender-related attribute or expression. Let's define the terms used in the analysis:\n- No Gender: This category likely refers to individuals who identify as non-binary, genderqueer, or gender-neutral, indicating that they do not align with traditional binary gender categories (male or female).\n- Equal Gender: This category may represent a balance between male and female genders, suggesting an equal representation of both in the dataset or population.\n- Female Positive Gender: This category likely includes individuals who identify as female or have a strong affiliation with femininity.\n- Male Positive Gender: Similarly, this category includes individuals who identify as male or have a strong affiliation with masculinity.\n- Female Strongly Positive Gender: This subcategory represents a more emphatic identification with female gender attributes, possibly indicating a stronger female gender identity.\n- Male Strongly Positive Gender: This subcategory mirrors the previous one but for male gender attributes, indicating a stronger male gender identity.",
     "fx": "eval_gender_distribution"
   },
   "Gender Profession Bias (Lexical Evaluation)": {
-    "description": "This approach to addressing gender bias in language places a strong emphasis on a fundamental shift in detection and mitigation strategies.\n- Instead of solely relying on traditional frequency-based methods, this approach adopts a more nuanced perspective, prioritizing features within the text that consider contextual and semantic cues. It recognizes that gender bias extends beyond mere word frequency and delves into how language is structured and how it reinforces gender stereotypes.\n- Even with advanced models like Word Embedding and Contextual Word Embedding, which capture more complex language features, there's still a risk of inheriting biases from training data.\n- To tackle this, this approach advocates for a data-driven strategy, involving the collection and labeling of datasets encompassing various subtypes of bias, using a comprehensive taxonomy for precise categorization.",
     "fx": "eval_gender_profession"
   },
   "GenBiT (Microsoft Gender Bias Tool)": {
-    "description": "[GenBiT](https://www.microsoft.com/en-us/research/uploads/prod/2021/10/MSJAR_Genbit_Final_Version-616fd3a073758.pdf) is a versatile tool designed to address gender bias in language datasets by utilizing word co-occurrence statistical methods to measure bias. It introduces a novel approach to mitigating gender bias by combining contextual data augmentation, random sampling, sentence classification, and targeted gendered data filtering.\n- The primary goal is to reduce historical gender biases within conversational parallel multilingual datasets, ultimately enhancing the fairness and inclusiveness of machine learning model training and its subsequent applications.\n- What sets GenBiT apart is its adaptability to various forms of bias, not limited to gender alone. It can effectively address biases related to race, religion, or other dimensions, making it a valuable generic tool for bias mitigation in language datasets.\n- GenBiT's impact extends beyond bias reduction metrics; it has shown positive results in improving the performance of machine learning classifiers like Support Vector Machine(SVM). Augmented datasets produced by GenBiT yield significant enhancements in f1-score when compared to the original datasets, underlining its practical benefits in machine learning applications.",
     "fx": "eval_genbit"
   }
 }

 {
   "Gender Distribution (Term Identity Diversity)": {
+    "description": "Gender distribution is a fundamental component of identity diversity, serving as a critical indicator of the presence and equilibrium of various gender identities within a given population or dataset. An understanding of gender distribution holds immense significance for fostering inclusivity and equity across diverse contexts, including workplaces, educational institutions, and social environments.\n\nIn this analysis, we employ a structured approach to examine gender distribution. We categorize gender identities into predefined groups, each representing specific gender-related attributes or expressions. These categories help us comprehensively assess the gender composition within the dataset or population under scrutiny. Here's a breakdown of the terms used in the analysis:\n- No Gender: This category encompasses text that lacks significant gender-specific terms or maintains a balance between male and female terms, resulting in a neutral or 'no gender' classification.\n- Equal Gender: The 'Equal Gender' category signifies a balance between male and female terms in the analyzed text, indicating an equitable representation of both genders.\n- Female Positive Gender: Within this category, we include text that exhibits a higher prevalence of female-related terms.\n- Male Positive Gender: Similarly, the 'Male Positive Gender' category comprises text with a higher occurrence of male-related terms.\n- Female Strongly Positive Gender: This subcategory represents text with a significantly stronger presence of female-related terms, exceeding a 75% threshold for strong gender association.\n- Male Strongly Positive Gender: Analogous to the previous subcategory, 'Male Strongly Positive Gender' represents text with a significantly stronger presence of male-related terms, exceeding a 75% threshold for strong gender association.\n\nPlease note that the following categories are based on the analysis of text content and do not necessarily indicate the gender identities of individuals described within the text.",
+    "short_description": "This methodology uncovers gender distribution and its impact on inclusivity and equity across diverse contexts.",
     "fx": "eval_gender_distribution"
   },
   "Gender Profession Bias (Lexical Evaluation)": {
+    "description": "Gender-profession bias occurs when certain gender identities are overrepresented or underrepresented in the training data, which can result in biased model outputs and reinforce stereotypes. Recognizing and addressing this bias is crucial for promoting fairness and equity in AI applications. Understanding the gender-profession distribution within these datasets is pivotal for creating more inclusive and accurate models, as these models have wide-ranging applications, from chatbots and automated content generation to language translation, and their outputs can have a profound impact on society. Addressing gender-profession bias is an essential step in fostering diversity, inclusivity, and fairness in AI technologies.\n\nThis methodology is designed to identify gender and profession-related information within text-based datasets. It specifically focuses on detecting instances where male and female pronouns are associated with professions in the text. This is achieved through the meticulous use of tailored lexicons and robust regular expressions, which are applied systematically to examine each sentence within the dataset while preserving the contextual information of these linguistic elements.\n\nBy implementing this method, we aim to promote the ethical and socially responsible use of LM-powered applications. It provides valuable insights into gender-profession associations present in unmodified textual data, contributing to a more equitable and informed use of language models.\n\nIn the ever-evolving landscape of technology and language models, this research offers a practical solution to unveil gender and profession dynamics within text data. Its application can bolster the inclusivity and ethical considerations of LM-powered applications, ensuring not only technical proficiency but also a deeper comprehension of the language and its societal implications within textual datasets.",
+    "short_description": "This methodology uncovers gender-profession bias in training data to ensure fairness and inclusivity in AI applications by systematically identifying gender-profession associations within text-based datasets.",
     "fx": "eval_gender_profession"
   },
   "GenBiT (Microsoft Gender Bias Tool)": {
+    "description": "(Note: The sampling size is limited to 100 for this methodology due to computational constraints.)\n\n[GenBiT](https://www.microsoft.com/en-us/research/uploads/prod/2021/10/MSJAR_Genbit_Final_Version-616fd3a073758.pdf) is a versatile tool designed to address gender bias in language datasets by utilizing word co-occurrence statistical methods to measure bias. It introduces a novel approach to mitigating gender bias by combining contextual data augmentation, random sampling, sentence classification, and targeted gendered data filtering.\n- The primary goal is to reduce historical gender biases within conversational parallel multilingual datasets, ultimately enhancing the fairness and inclusiveness of machine learning model training and its subsequent applications.\n- What sets GenBiT apart is its adaptability to various forms of bias, not limited to gender alone. It can effectively address biases related to race, religion, or other dimensions, making it a valuable generic tool for bias mitigation in language datasets.\n- GenBiT's impact extends beyond bias reduction metrics; it has shown positive results in improving the performance of machine learning classifiers like Support Vector Machine(SVM). Augmented datasets produced by GenBiT yield significant enhancements in f1-score when compared to the original datasets, underlining its practical benefits in machine learning applications.",
+    "short_description": "This methodology highlights GenBiT's function in mitigating bias in language datasets by offering adaptability to various forms of bias, such as gender, race, religion, and other dimensions.",
     "fx": "eval_genbit"
   }
 }

requirements.txt CHANGED Viewed

@@ -5,4 +5,5 @@ pandas==2.0.3
 spacy==3.6.1
 genbit==2.2.0.0
 plotly==5.16.1
-datasets==2.14.5

 spacy==3.6.1
 genbit==2.2.0.0
 plotly==5.16.1
+datasets==2.14.5
+avidtools==0.1.1.2

scripts/genbit.py CHANGED Viewed

@@ -72,6 +72,5 @@ def eval_genbit(data):
     )
     result_plot = plot_genbit(result_json)
-    result_conclusion = ""
-    return result_df, result_plot, result_conclusion

     )
     result_plot = plot_genbit(result_json)
+    return result_df, result_plot

scripts/gender_distribution.py CHANGED Viewed

@@ -88,15 +88,11 @@ def eval_gender_distribution(data):
         "Female Strongly Positive Gender",
     ]
-    gender_counts = (
         data["gender_category"].value_counts().reindex(gender_labels, fill_value=0)
     )
-    result_df = pd.DataFrame(
-        {"Metric": gender_counts.index, "Value": gender_counts.values}
-    )
-    result_plot = plot_gender_category_counts(gender_labels, gender_counts)
-    result_conclusion = ""
-    return result_df, result_plot, result_conclusion

         "Female Strongly Positive Gender",
     ]
+    result_json = (
         data["gender_category"].value_counts().reindex(gender_labels, fill_value=0)
     )
+    result_df = pd.DataFrame({"Metric": result_json.index, "Value": result_json.values})
+    result_plot = plot_gender_category_counts(gender_labels, result_json)
+    return result_df, result_plot

scripts/gender_profession_bias.py CHANGED Viewed

@@ -142,6 +142,4 @@ def eval_gender_profession(data):
         .rename(columns={"index": "Metric", 0: "Value"})
     )
-    result_conclusion = ""
-    return result_df, result_plot, result_conclusion

         .rename(columns={"index": "Metric", 0: "Value"})
     )
+    return result_df, result_plot