Spaces:

shauryaDugar
/

Bias_Detection

Runtime error

App Files Files Community

shauryaDugar commited on Mar 10

Commit

b9461b7

•

1 Parent(s): ec721cb

Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

.DS_Store +0 -0
.gitignore +166 -0
README.md +3 -9
allpreds.py +133 -0
gcode.py +54 -0
interface.py +31 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

.gitignore ADDED Viewed

	@@ -0,0 +1,166 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+#gradio and related
+flagged/
+gradio_cached_examples/
+*.csv
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

README.md CHANGED Viewed

@@ -1,12 +1,6 @@
 ---
-title: Bias Detection
-emoji: 🐠
-colorFrom: pink
-colorTo: gray
 sdk: gradio
-sdk_version: 4.21.0
-app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Bias_Detection
+app_file: interface.py
 sdk: gradio
+sdk_version: 4.12.0
 ---

allpreds.py ADDED Viewed

	@@ -0,0 +1,133 @@

+from gcode import predict_text_classification_single_label_sample
+import pandas as pd
+from dotenv import load_dotenv
+import os
+from quickchart import QuickChart
+load_dotenv()
+# this dictionary contains the kinds of bias and the Vertex endpoint IDs
+list_of_biases_and_endpts = {"Gender Bias": {"bias_type": "gender_bias", "endpoint_id": os.environ.get("GENDER_ENDPOINT_ID")},
+                             "Racial Bias": {"bias_type": "racial_bias", "endpoint_id": os.environ.get("RACIAL_ENDPOINT_ID")},
+                             "Political Bias": {"bias_type": "political_bias", "endpoint_id": os.environ.get("POLITICAL_ENDPOINT_ID")},
+                             "Hate Speech": {"bias_type": "hate_speech", "endpoint_id": os.environ.get("HATE_ENDPOINT_ID")}}
+# this dictionary keeps track of the order of biased confidence score
+# (if order = 1, it means that at index 1 the value is bias confidence, if order =0 it means that at index 0 the value is bias confidence)
+order_in_confidence = {"gender_bias": 1, "racial_bias": 1, "political_bias": 0, "hate_speech": 0}
+# make_preds is like the boss of all functions and it uses predict function and generateChart function to do smaller tasks
+def make_preds(content, bias_type):
+    pos_tokens=[] # this stores the list of tuples which are passed to the highlightText component
+    bias_scores={} # this dictionary is for the bar chart for the Bias %
+    bias = list_of_biases_and_endpts[bias_type]
+    df, bias_percentage = predict(content, bias["bias_type"], bias["endpoint_id"])
+    bias_scores[bias_type] = bias_percentage
+    # these lines of code are responsible for generating the data for highlightText component
+    for ind in df.index:
+        pos_tokens.extend([(df['content'][ind], bias_type if df['predictions'][ind]>0.5 else None), (" ", None)])
+    # The next line makes use of a tool called quickcharts to create a radial guage chart
+    html = generateChart(bias_percentage)
+    # the three values returned here are used by the UI output components
+    return pos_tokens, bias_scores, html
+# the predict function acutally generates the predictions for the user content and it returns a dataframe containing
+# the 'content' and 'prediction' columns. The prediction column contains the bias confidence score.
+# predict function also returns the bias percentage
+def predict(content, bias_type, endpoint_id):
+    # split the article into 20 work chunks using the function
+    chunks = split_into_20_word_chunks(content)
+    possibly_biased = []
+    # define the dataframe with two columns - 'content' and 'predictions'
+    df = pd.DataFrame(columns=['content', 'predictions'])
+    # for each chunk in the content, create a prediction and add a row to the dataframe
+    for content in chunks:
+        predictions = predict_text_classification_single_label_sample(
+            project=os.environ.get("PROJECT_ID") ,
+            endpoint_id=endpoint_id,
+            location="us-central1",
+            content=content
+        )
+        for prediction in predictions:
+            res=float(dict(prediction)['confidences'][order_in_confidence[bias_type]])
+            new_row  = {'content': content, 'predictions': res}
+            df.loc[len(df)] =  new_row
+            if dict(prediction)['confidences'][order_in_confidence[bias_type]] > 0.5:
+                possibly_biased.append(content)
+    # save the dataframe as a csv file
+    df.to_csv(f'preds_{bias_type}.csv')
+    bias_percentage = round(len(possibly_biased)/len(chunks), 2)
+    return df, bias_percentage
+# this function splits the content into 20 word chunks
+def split_into_20_word_chunks(long_string):
+    words = long_string.split()
+    chunks = []
+    chunk_size = 20
+    for i in range(0, len(words), chunk_size):
+        chunk = ' '.join(words[i:i + chunk_size])
+        chunks.append(chunk)
+    return chunks
+# this function splits the content into sentences
+def split_into_sentences(long_string):
+    sentences = []
+    current_sentence = ""
+    punctuation_marks = {'.', '?', '!'}
+    for char in long_string:
+        current_sentence += char
+        if char in punctuation_marks:
+            sentences.append(current_sentence.strip())
+            current_sentence = ""
+    if current_sentence:
+        sentences.append(current_sentence.strip())
+    return sentences
+# generatesChart function creates the circular bias Percentage chart
+# it uses the quickChart library which is used to plot charts and graphs
+def generateChart(bias_percentage):
+    qc = QuickChart()
+    qc.width = 500
+    qc.height = 300
+    qc.version = '2'
+    # Config can be set as a string or as a nested dict
+    qc.config = """{
+    type: 'radialGauge',
+    data: {
+        datasets: [{
+        data: ["""+str(round(bias_percentage*100, 0))+"""],
+        backgroundColor: getGradientFillHelper('horizontal', ['red', 'blue']),
+        }]
+    },
+    options: {
+        // See https://github.com/pandameister/chartjs-chart-radial-gauge#options
+        domain: [0, 100],
+        trackColor: '#f0f8ff',
+        centerPercentage: 90,
+        centerArea: {
+        text: (val) => val + '%',
+        },
+    }
+    }"""
+    url=qc.get_url()
+    html=f"""<img src="{url}"/>"""
+    html = (
+    "<div style='max-width:100%; max-height:360px; overflow:auto'>"
+    + html
+    + "</div>")
+    return html

gcode.py ADDED Viewed

	@@ -0,0 +1,54 @@

+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# [START aiplatform_predict_text_classification_single_label_sample]
+from google.cloud import aiplatform
+from google.cloud.aiplatform.gapic.schema import predict
+from google.protobuf import json_format
+from google.protobuf.struct_pb2 import Value
+def predict_text_classification_single_label_sample(
+    project: str,
+    endpoint_id: str,
+    content: str,
+    location: str = "us-central1",
+    api_endpoint: str = "us-central1-aiplatform.googleapis.com",
+):
+    # The AI Platform services require regional API endpoints.
+    client_options = {"api_endpoint": api_endpoint}
+    # Initialize client that will be used to create and send requests.
+    # This client only needs to be created once, and can be reused for multiple requests.
+    client = aiplatform.gapic.PredictionServiceClient(client_options=client_options)
+    instance = predict.instance.TextClassificationPredictionInstance(
+        content=content,
+    ).to_value()
+    instances = [instance]
+    parameters_dict = {}
+    parameters = json_format.ParseDict(parameters_dict, Value())
+    endpoint = client.endpoint_path(
+        project=project, location=location, endpoint=endpoint_id
+    )
+    response = client.predict(
+        endpoint=endpoint, instances=instances, parameters=parameters
+    )
+    print("response")
+    print(" deployed_model_id:", response.deployed_model_id)
+    predictions = response.predictions
+    for prediction in predictions:
+        print(" prediction:", dict(prediction))
+    return predictions
+# [END aiplatform_predict_text_classification_single_label_sample]

interface.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import gradio as gr
+from allpreds import make_preds
+# the examples variable is a list of lists. It contains an example input in each list.
+# for eg. here there is only one example so the size of the list 'examples' is 1
+# in the first example, two elements are there - the text and the Bias type
+examples = [['''Women are often considered to be more emotional and sensitive than men, which can make them less suitable for high-stakes decision-making roles in fields such as finance and politics. Men, on the other hand, are seen as logical and rational, making them better equipped to handle the pressures of leadership.''', "Gender Bias"]]
+# this variable 'demo' defines the look of the web page - meaning the layout of inputs and outputs, etc.
+# A gradio interface needs 3 things - a function, inputs and outputs.
+# The inputs are given to the function as input arguments and outputs are passed to the output components
+demo = gr.Interface(
+    fn=make_preds,
+    # our webpage has 2 inputs - the content which is a text field and the bias type which is a radio button
+    inputs=[gr.TextArea(label="Content",
+                        show_label=True,
+                        placeholder="Enter some text to detect bias....."),
+            gr.Radio(choices=["Gender Bias", "Racial Bias", "Political Bias", "Hate Speech"],
+                        label="Bias Type")],
+    # there are 3 outputs - the highlighttext which shows the highlighted text output, the bias % output and the chart output
+    outputs=[gr.HighlightedText(combine_adjacent=True, label="Bias Analysis", show_label=True),
+             gr.Label(label="Percentage", show_label=True),
+             gr.HTML(label="Bias Score", show_label=True)],
+    examples=examples,
+    title="Bias Analyzer",
+    description="This app lets you detect and analyze different types of bias in written content. Right now we have support for four bias categories:  gender, racial, political, and hate-speech. More categories will be added soon!"
+)
+demo.launch(share=True)