shauryaDugar commited on
Commit
b9461b7
1 Parent(s): ec721cb

Upload folder using huggingface_hub

Browse files
Files changed (6) hide show
  1. .DS_Store +0 -0
  2. .gitignore +166 -0
  3. README.md +3 -9
  4. allpreds.py +133 -0
  5. gcode.py +54 -0
  6. interface.py +31 -0
.DS_Store ADDED
Binary file (6.15 kB). View file
 
.gitignore ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ #gradio and related
156
+ flagged/
157
+ gradio_cached_examples/
158
+
159
+ *.csv
160
+
161
+ # PyCharm
162
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
163
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
164
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
165
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
166
+ #.idea/
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: Bias Detection
3
- emoji: 🐠
4
- colorFrom: pink
5
- colorTo: gray
6
  sdk: gradio
7
- sdk_version: 4.21.0
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Bias_Detection
3
+ app_file: interface.py
 
 
4
  sdk: gradio
5
+ sdk_version: 4.12.0
 
 
6
  ---
 
 
allpreds.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from gcode import predict_text_classification_single_label_sample
2
+ import pandas as pd
3
+ from dotenv import load_dotenv
4
+ import os
5
+ from quickchart import QuickChart
6
+
7
+ load_dotenv()
8
+
9
+ # this dictionary contains the kinds of bias and the Vertex endpoint IDs
10
+ list_of_biases_and_endpts = {"Gender Bias": {"bias_type": "gender_bias", "endpoint_id": os.environ.get("GENDER_ENDPOINT_ID")},
11
+ "Racial Bias": {"bias_type": "racial_bias", "endpoint_id": os.environ.get("RACIAL_ENDPOINT_ID")},
12
+ "Political Bias": {"bias_type": "political_bias", "endpoint_id": os.environ.get("POLITICAL_ENDPOINT_ID")},
13
+ "Hate Speech": {"bias_type": "hate_speech", "endpoint_id": os.environ.get("HATE_ENDPOINT_ID")}}
14
+
15
+ # this dictionary keeps track of the order of biased confidence score
16
+ # (if order = 1, it means that at index 1 the value is bias confidence, if order =0 it means that at index 0 the value is bias confidence)
17
+ order_in_confidence = {"gender_bias": 1, "racial_bias": 1, "political_bias": 0, "hate_speech": 0}
18
+
19
+
20
+
21
+ # make_preds is like the boss of all functions and it uses predict function and generateChart function to do smaller tasks
22
+ def make_preds(content, bias_type):
23
+ pos_tokens=[] # this stores the list of tuples which are passed to the highlightText component
24
+ bias_scores={} # this dictionary is for the bar chart for the Bias %
25
+ bias = list_of_biases_and_endpts[bias_type]
26
+ df, bias_percentage = predict(content, bias["bias_type"], bias["endpoint_id"])
27
+ bias_scores[bias_type] = bias_percentage
28
+ # these lines of code are responsible for generating the data for highlightText component
29
+ for ind in df.index:
30
+ pos_tokens.extend([(df['content'][ind], bias_type if df['predictions'][ind]>0.5 else None), (" ", None)])
31
+
32
+ # The next line makes use of a tool called quickcharts to create a radial guage chart
33
+ html = generateChart(bias_percentage)
34
+
35
+ # the three values returned here are used by the UI output components
36
+ return pos_tokens, bias_scores, html
37
+
38
+
39
+
40
+ # the predict function acutally generates the predictions for the user content and it returns a dataframe containing
41
+ # the 'content' and 'prediction' columns. The prediction column contains the bias confidence score.
42
+ # predict function also returns the bias percentage
43
+ def predict(content, bias_type, endpoint_id):
44
+ # split the article into 20 work chunks using the function
45
+ chunks = split_into_20_word_chunks(content)
46
+ possibly_biased = []
47
+ # define the dataframe with two columns - 'content' and 'predictions'
48
+ df = pd.DataFrame(columns=['content', 'predictions'])
49
+ # for each chunk in the content, create a prediction and add a row to the dataframe
50
+ for content in chunks:
51
+ predictions = predict_text_classification_single_label_sample(
52
+ project=os.environ.get("PROJECT_ID") ,
53
+ endpoint_id=endpoint_id,
54
+ location="us-central1",
55
+ content=content
56
+ )
57
+ for prediction in predictions:
58
+ res=float(dict(prediction)['confidences'][order_in_confidence[bias_type]])
59
+ new_row = {'content': content, 'predictions': res}
60
+ df.loc[len(df)] = new_row
61
+ if dict(prediction)['confidences'][order_in_confidence[bias_type]] > 0.5:
62
+ possibly_biased.append(content)
63
+
64
+ # save the dataframe as a csv file
65
+ df.to_csv(f'preds_{bias_type}.csv')
66
+ bias_percentage = round(len(possibly_biased)/len(chunks), 2)
67
+ return df, bias_percentage
68
+
69
+ # this function splits the content into 20 word chunks
70
+ def split_into_20_word_chunks(long_string):
71
+ words = long_string.split()
72
+ chunks = []
73
+ chunk_size = 20
74
+
75
+ for i in range(0, len(words), chunk_size):
76
+ chunk = ' '.join(words[i:i + chunk_size])
77
+ chunks.append(chunk)
78
+
79
+ return chunks
80
+
81
+ # this function splits the content into sentences
82
+ def split_into_sentences(long_string):
83
+ sentences = []
84
+ current_sentence = ""
85
+ punctuation_marks = {'.', '?', '!'}
86
+
87
+ for char in long_string:
88
+ current_sentence += char
89
+ if char in punctuation_marks:
90
+ sentences.append(current_sentence.strip())
91
+ current_sentence = ""
92
+
93
+ if current_sentence:
94
+ sentences.append(current_sentence.strip())
95
+
96
+ return sentences
97
+
98
+
99
+ # generatesChart function creates the circular bias Percentage chart
100
+ # it uses the quickChart library which is used to plot charts and graphs
101
+ def generateChart(bias_percentage):
102
+ qc = QuickChart()
103
+ qc.width = 500
104
+ qc.height = 300
105
+ qc.version = '2'
106
+
107
+ # Config can be set as a string or as a nested dict
108
+ qc.config = """{
109
+ type: 'radialGauge',
110
+ data: {
111
+ datasets: [{
112
+ data: ["""+str(round(bias_percentage*100, 0))+"""],
113
+ backgroundColor: getGradientFillHelper('horizontal', ['red', 'blue']),
114
+ }]
115
+ },
116
+ options: {
117
+ // See https://github.com/pandameister/chartjs-chart-radial-gauge#options
118
+ domain: [0, 100],
119
+ trackColor: '#f0f8ff',
120
+ centerPercentage: 90,
121
+ centerArea: {
122
+ text: (val) => val + '%',
123
+ },
124
+ }
125
+ }"""
126
+
127
+ url=qc.get_url()
128
+ html=f"""<img src="{url}"/>"""
129
+ html = (
130
+ "<div style='max-width:100%; max-height:360px; overflow:auto'>"
131
+ + html
132
+ + "</div>")
133
+ return html
gcode.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ # [START aiplatform_predict_text_classification_single_label_sample]
16
+ from google.cloud import aiplatform
17
+ from google.cloud.aiplatform.gapic.schema import predict
18
+ from google.protobuf import json_format
19
+ from google.protobuf.struct_pb2 import Value
20
+
21
+
22
+ def predict_text_classification_single_label_sample(
23
+ project: str,
24
+ endpoint_id: str,
25
+ content: str,
26
+ location: str = "us-central1",
27
+ api_endpoint: str = "us-central1-aiplatform.googleapis.com",
28
+ ):
29
+ # The AI Platform services require regional API endpoints.
30
+ client_options = {"api_endpoint": api_endpoint}
31
+ # Initialize client that will be used to create and send requests.
32
+ # This client only needs to be created once, and can be reused for multiple requests.
33
+ client = aiplatform.gapic.PredictionServiceClient(client_options=client_options)
34
+ instance = predict.instance.TextClassificationPredictionInstance(
35
+ content=content,
36
+ ).to_value()
37
+ instances = [instance]
38
+ parameters_dict = {}
39
+ parameters = json_format.ParseDict(parameters_dict, Value())
40
+ endpoint = client.endpoint_path(
41
+ project=project, location=location, endpoint=endpoint_id
42
+ )
43
+ response = client.predict(
44
+ endpoint=endpoint, instances=instances, parameters=parameters
45
+ )
46
+ print("response")
47
+ print(" deployed_model_id:", response.deployed_model_id)
48
+
49
+ predictions = response.predictions
50
+ for prediction in predictions:
51
+ print(" prediction:", dict(prediction))
52
+ return predictions
53
+
54
+ # [END aiplatform_predict_text_classification_single_label_sample]
interface.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from allpreds import make_preds
3
+
4
+ # the examples variable is a list of lists. It contains an example input in each list.
5
+ # for eg. here there is only one example so the size of the list 'examples' is 1
6
+ # in the first example, two elements are there - the text and the Bias type
7
+ examples = [['''Women are often considered to be more emotional and sensitive than men, which can make them less suitable for high-stakes decision-making roles in fields such as finance and politics. Men, on the other hand, are seen as logical and rational, making them better equipped to handle the pressures of leadership.''', "Gender Bias"]]
8
+
9
+ # this variable 'demo' defines the look of the web page - meaning the layout of inputs and outputs, etc.
10
+ # A gradio interface needs 3 things - a function, inputs and outputs.
11
+ # The inputs are given to the function as input arguments and outputs are passed to the output components
12
+ demo = gr.Interface(
13
+ fn=make_preds,
14
+ # our webpage has 2 inputs - the content which is a text field and the bias type which is a radio button
15
+ inputs=[gr.TextArea(label="Content",
16
+ show_label=True,
17
+ placeholder="Enter some text to detect bias....."),
18
+ gr.Radio(choices=["Gender Bias", "Racial Bias", "Political Bias", "Hate Speech"],
19
+ label="Bias Type")],
20
+
21
+ # there are 3 outputs - the highlighttext which shows the highlighted text output, the bias % output and the chart output
22
+ outputs=[gr.HighlightedText(combine_adjacent=True, label="Bias Analysis", show_label=True),
23
+ gr.Label(label="Percentage", show_label=True),
24
+ gr.HTML(label="Bias Score", show_label=True)],
25
+
26
+ examples=examples,
27
+ title="Bias Analyzer",
28
+ description="This app lets you detect and analyze different types of bias in written content. Right now we have support for four bias categories: gender, racial, political, and hate-speech. More categories will be added soon!"
29
+ )
30
+
31
+ demo.launch(share=True)