Zekun Wu commited on
Commit
3d31bd3
1 Parent(s): 5580be6
Files changed (2) hide show
  1. bias_detector/bias_detector.py +2 -2
  2. test.ipynb +256 -0
bias_detector/bias_detector.py CHANGED
@@ -141,10 +141,10 @@ class Detector:
141
  for item in prediction:
142
  result[item['word']] = {item['entity_group']: item['score']}
143
  elif self.classifier == 'Sentence' and self.model_type == 'All':
144
- result = {self.MD_SL_label_mapping.get(item['label'].split('__')[-1], 'unknown'): item['score'] for item in
145
  prediction}
146
  else:
147
- result = {self.SD_SL_label_mapping.get(item['label'].split('__')[-1], 'unknown'): item['score'] for item in
148
  prediction}
149
  results.append({text: result})
150
 
 
141
  for item in prediction:
142
  result[item['word']] = {item['entity_group']: item['score']}
143
  elif self.classifier == 'Sentence' and self.model_type == 'All':
144
+ result = {item['label'].split('__')[-1]: item['score'] for item in
145
  prediction}
146
  else:
147
+ result = {item['label'].split('__')[-1]: item['score'] for item in
148
  prediction}
149
  results.append({text: result})
150
 
test.ipynb ADDED
@@ -0,0 +1,256 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {
7
+ "collapsed": true,
8
+ "ExecuteTime": {
9
+ "end_time": "2023-07-03T11:40:02.257868Z",
10
+ "start_time": "2023-07-03T11:39:50.130235Z"
11
+ }
12
+ },
13
+ "outputs": [
14
+ {
15
+ "name": "stdout",
16
+ "output_type": "stream",
17
+ "text": [
18
+ "Collecting transformers\r\n",
19
+ " Using cached transformers-4.30.2-py3-none-any.whl (7.2 MB)\r\n",
20
+ "Collecting torch\r\n",
21
+ " Using cached torch-2.0.1-cp39-none-macosx_11_0_arm64.whl (55.8 MB)\r\n",
22
+ "Requirement already satisfied: pyyaml>=5.1 in ./venv/lib/python3.9/site-packages (from transformers) (6.0)\r\n",
23
+ "Requirement already satisfied: requests in ./venv/lib/python3.9/site-packages (from transformers) (2.31.0)\r\n",
24
+ "Collecting huggingface-hub<1.0,>=0.14.1\r\n",
25
+ " Using cached huggingface_hub-0.15.1-py3-none-any.whl (236 kB)\r\n",
26
+ "Collecting regex!=2019.12.17\r\n",
27
+ " Using cached regex-2023.6.3-cp39-cp39-macosx_11_0_arm64.whl (288 kB)\r\n",
28
+ "Collecting safetensors>=0.3.1\r\n",
29
+ " Using cached safetensors-0.3.1-cp39-cp39-macosx_12_0_arm64.whl (401 kB)\r\n",
30
+ "Collecting tokenizers!=0.11.3,<0.14,>=0.11.1\r\n",
31
+ " Using cached tokenizers-0.13.3-cp39-cp39-macosx_12_0_arm64.whl (3.9 MB)\r\n",
32
+ "Requirement already satisfied: numpy>=1.17 in ./venv/lib/python3.9/site-packages (from transformers) (1.25.0)\r\n",
33
+ "Requirement already satisfied: packaging>=20.0 in ./venv/lib/python3.9/site-packages (from transformers) (23.1)\r\n",
34
+ "Collecting filelock\r\n",
35
+ " Using cached filelock-3.12.2-py3-none-any.whl (10 kB)\r\n",
36
+ "Collecting tqdm>=4.27\r\n",
37
+ " Using cached tqdm-4.65.0-py3-none-any.whl (77 kB)\r\n",
38
+ "Collecting networkx\r\n",
39
+ " Using cached networkx-3.1-py3-none-any.whl (2.1 MB)\r\n",
40
+ "Requirement already satisfied: jinja2 in ./venv/lib/python3.9/site-packages (from torch) (3.1.2)\r\n",
41
+ "Collecting sympy\r\n",
42
+ " Using cached sympy-1.12-py3-none-any.whl (5.7 MB)\r\n",
43
+ "Requirement already satisfied: typing-extensions in ./venv/lib/python3.9/site-packages (from torch) (4.7.0)\r\n",
44
+ "Collecting fsspec\r\n",
45
+ " Using cached fsspec-2023.6.0-py3-none-any.whl (163 kB)\r\n",
46
+ "Requirement already satisfied: MarkupSafe>=2.0 in ./venv/lib/python3.9/site-packages (from jinja2->torch) (2.1.3)\r\n",
47
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in ./venv/lib/python3.9/site-packages (from requests->transformers) (2.0.3)\r\n",
48
+ "Requirement already satisfied: idna<4,>=2.5 in ./venv/lib/python3.9/site-packages (from requests->transformers) (3.4)\r\n",
49
+ "Requirement already satisfied: certifi>=2017.4.17 in ./venv/lib/python3.9/site-packages (from requests->transformers) (2023.5.7)\r\n",
50
+ "Requirement already satisfied: charset-normalizer<4,>=2 in ./venv/lib/python3.9/site-packages (from requests->transformers) (3.1.0)\r\n",
51
+ "Collecting mpmath>=0.19\r\n",
52
+ " Using cached mpmath-1.3.0-py3-none-any.whl (536 kB)\r\n",
53
+ "Installing collected packages: tokenizers, safetensors, mpmath, tqdm, sympy, regex, networkx, fsspec, filelock, torch, huggingface-hub, transformers\r\n",
54
+ "Successfully installed filelock-3.12.2 fsspec-2023.6.0 huggingface-hub-0.15.1 mpmath-1.3.0 networkx-3.1 regex-2023.6.3 safetensors-0.3.1 sympy-1.12 tokenizers-0.13.3 torch-2.0.1 tqdm-4.65.0 transformers-4.30.2\r\n",
55
+ "\r\n",
56
+ "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m A new release of pip available: \u001B[0m\u001B[31;49m22.3.1\u001B[0m\u001B[39;49m -> \u001B[0m\u001B[32;49m23.1.2\u001B[0m\r\n",
57
+ "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m To update, run: \u001B[0m\u001B[32;49mpip install --upgrade pip\u001B[0m\r\n",
58
+ "Note: you may need to restart the kernel to use updated packages.\n"
59
+ ]
60
+ }
61
+ ],
62
+ "source": [
63
+ "pip install transformers torch"
64
+ ]
65
+ },
66
+ {
67
+ "cell_type": "code",
68
+ "execution_count": 2,
69
+ "outputs": [
70
+ {
71
+ "name": "stderr",
72
+ "output_type": "stream",
73
+ "text": [
74
+ "/Users/zekunwu/Desktop/Multidimensional_Multilevel_Bias_Detection/venv/lib/python3.9/site-packages/urllib3/__init__.py:34: NotOpenSSLWarning: urllib3 v2.0 only supports OpenSSL 1.1.1+, currently the 'ssl' module is compiled with 'LibreSSL 2.8.3'. See: https://github.com/urllib3/urllib3/issues/3020\n",
75
+ " warnings.warn(\n"
76
+ ]
77
+ }
78
+ ],
79
+ "source": [
80
+ "from transformers import pipeline"
81
+ ],
82
+ "metadata": {
83
+ "collapsed": false,
84
+ "ExecuteTime": {
85
+ "end_time": "2023-07-03T11:40:15.740384Z",
86
+ "start_time": "2023-07-03T11:40:13.337662Z"
87
+ }
88
+ }
89
+ },
90
+ {
91
+ "cell_type": "code",
92
+ "execution_count": 3,
93
+ "outputs": [
94
+ {
95
+ "data": {
96
+ "text/plain": "Downloading (…)lve/main/config.json: 0%| | 0.00/1.22k [00:00<?, ?B/s]",
97
+ "application/vnd.jupyter.widget-view+json": {
98
+ "version_major": 2,
99
+ "version_minor": 0,
100
+ "model_id": "fe49340d8bde452783356e63521406f8"
101
+ }
102
+ },
103
+ "metadata": {},
104
+ "output_type": "display_data"
105
+ },
106
+ {
107
+ "data": {
108
+ "text/plain": "Downloading pytorch_model.bin: 0%| | 0.00/268M [00:00<?, ?B/s]",
109
+ "application/vnd.jupyter.widget-view+json": {
110
+ "version_major": 2,
111
+ "version_minor": 0,
112
+ "model_id": "5bde37e9ea7847aabe552d785ad92259"
113
+ }
114
+ },
115
+ "metadata": {},
116
+ "output_type": "display_data"
117
+ },
118
+ {
119
+ "data": {
120
+ "text/plain": "Downloading (…)okenizer_config.json: 0%| | 0.00/320 [00:00<?, ?B/s]",
121
+ "application/vnd.jupyter.widget-view+json": {
122
+ "version_major": 2,
123
+ "version_minor": 0,
124
+ "model_id": "d37b8a75cfbe4d45a432e4f60f467f9e"
125
+ }
126
+ },
127
+ "metadata": {},
128
+ "output_type": "display_data"
129
+ },
130
+ {
131
+ "data": {
132
+ "text/plain": "Downloading (…)solve/main/vocab.txt: 0%| | 0.00/232k [00:00<?, ?B/s]",
133
+ "application/vnd.jupyter.widget-view+json": {
134
+ "version_major": 2,
135
+ "version_minor": 0,
136
+ "model_id": "849bb76de24745edb6dc2525abddb654"
137
+ }
138
+ },
139
+ "metadata": {},
140
+ "output_type": "display_data"
141
+ },
142
+ {
143
+ "data": {
144
+ "text/plain": "Downloading (…)/main/tokenizer.json: 0%| | 0.00/712k [00:00<?, ?B/s]",
145
+ "application/vnd.jupyter.widget-view+json": {
146
+ "version_major": 2,
147
+ "version_minor": 0,
148
+ "model_id": "7715e71cc3ee417880da6791d3fb5c59"
149
+ }
150
+ },
151
+ "metadata": {},
152
+ "output_type": "display_data"
153
+ },
154
+ {
155
+ "data": {
156
+ "text/plain": "Downloading (…)cial_tokens_map.json: 0%| | 0.00/125 [00:00<?, ?B/s]",
157
+ "application/vnd.jupyter.widget-view+json": {
158
+ "version_major": 2,
159
+ "version_minor": 0,
160
+ "model_id": "4ee0092408bc48cd8cbe84b7e173ff2b"
161
+ }
162
+ },
163
+ "metadata": {},
164
+ "output_type": "display_data"
165
+ },
166
+ {
167
+ "name": "stderr",
168
+ "output_type": "stream",
169
+ "text": [
170
+ "Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers\n",
171
+ "pip install xformers.\n"
172
+ ]
173
+ }
174
+ ],
175
+ "source": [
176
+ "testpipe = pipeline(\"text-classification\",\"wu981526092/Sentence-Level-Multidimensional-Bias-Detector\")"
177
+ ],
178
+ "metadata": {
179
+ "collapsed": false,
180
+ "ExecuteTime": {
181
+ "end_time": "2023-07-03T11:40:46.412969Z",
182
+ "start_time": "2023-07-03T11:40:18.473347Z"
183
+ }
184
+ }
185
+ },
186
+ {
187
+ "cell_type": "code",
188
+ "execution_count": 4,
189
+ "outputs": [],
190
+ "source": [
191
+ "result = testpipe.predict([\"this is a test sentence\"])"
192
+ ],
193
+ "metadata": {
194
+ "collapsed": false,
195
+ "ExecuteTime": {
196
+ "end_time": "2023-07-03T11:40:50.912734Z",
197
+ "start_time": "2023-07-03T11:40:50.881524Z"
198
+ }
199
+ }
200
+ },
201
+ {
202
+ "cell_type": "code",
203
+ "execution_count": 5,
204
+ "outputs": [
205
+ {
206
+ "data": {
207
+ "text/plain": "[{'label': 'unrelated', 'score': 0.990786075592041}]"
208
+ },
209
+ "execution_count": 5,
210
+ "metadata": {},
211
+ "output_type": "execute_result"
212
+ }
213
+ ],
214
+ "source": [
215
+ "result"
216
+ ],
217
+ "metadata": {
218
+ "collapsed": false,
219
+ "ExecuteTime": {
220
+ "end_time": "2023-07-03T11:40:54.505395Z",
221
+ "start_time": "2023-07-03T11:40:54.498101Z"
222
+ }
223
+ }
224
+ },
225
+ {
226
+ "cell_type": "code",
227
+ "execution_count": null,
228
+ "outputs": [],
229
+ "source": [],
230
+ "metadata": {
231
+ "collapsed": false
232
+ }
233
+ }
234
+ ],
235
+ "metadata": {
236
+ "kernelspec": {
237
+ "display_name": "Python 3",
238
+ "language": "python",
239
+ "name": "python3"
240
+ },
241
+ "language_info": {
242
+ "codemirror_mode": {
243
+ "name": "ipython",
244
+ "version": 2
245
+ },
246
+ "file_extension": ".py",
247
+ "mimetype": "text/x-python",
248
+ "name": "python",
249
+ "nbconvert_exporter": "python",
250
+ "pygments_lexer": "ipython2",
251
+ "version": "2.7.6"
252
+ }
253
+ },
254
+ "nbformat": 4,
255
+ "nbformat_minor": 0
256
+ }