Zekun Wu commited on
Commit
40c82a6
1 Parent(s): 6cc48e7
.DS_Store ADDED
Binary file (6.15 kB). View file
 
.idea/.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Editor-based HTTP Client requests
5
+ /httpRequests/
6
+ # Datasource local storage ignored files
7
+ /dataSources/
8
+ /dataSources.local.xml
.idea/Multidimensional_Multilevel_Bias_Detection.iml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$">
5
+ <excludeFolder url="file://$MODULE_DIR$/venv" />
6
+ </content>
7
+ <orderEntry type="inheritedJdk" />
8
+ <orderEntry type="sourceFolder" forTests="false" />
9
+ </component>
10
+ </module>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/misc.xml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (Multidimensional_Multilevel_Bias_Detection)" project-jdk-type="Python SDK" />
4
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/Multidimensional_Multilevel_Bias_Detection.iml" filepath="$PROJECT_DIR$/.idea/Multidimensional_Multilevel_Bias_Detection.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="" vcs="Git" />
5
+ </component>
6
+ </project>
README.md CHANGED
@@ -1,13 +1 @@
1
- ---
2
- title: Multidimensional Multilevel Bias Detection
3
- emoji: 🏆
4
- colorFrom: red
5
- colorTo: red
6
- sdk: streamlit
7
- sdk_version: 1.21.0
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # text-bias-classification
 
 
 
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from bias_detector import Detector
3
+
4
+ st.title("Multidimensional Multilevel Bias Detection")
5
+
6
+ level = st.selectbox("Select the Bias Levels:", ("Token","Sentence"))
7
+ dimension = st.selectbox("Select the Bias Dimensions:", ("All","Gender","Religion","Race","Profession"))
8
+ detector = Detector(level,dimension)
9
+ target_sentence = st.text_input("Input the sentence you want to detect:")
10
+
11
+ def format_results(results):
12
+ formatted = ""
13
+ for result in results:
14
+ for text, pred in result.items():
15
+ formatted += f"**Text**: {text}\n\n"
16
+ formatted += "**Predictions**:\n"
17
+ for token, labels in pred.items():
18
+ formatted += f"- Token: `{token}`\n"
19
+ for label, score in labels.items():
20
+ formatted += f" - Label: `{label}`, Score: `{score}`\n"
21
+ return formatted
22
+
23
+ if st.button("Detect"):
24
+ results = detector.predict([target_sentence])
25
+ formatted_results = format_results(results)
26
+ st.markdown(f"## Detection Results: \n\n {formatted_results}")
bias_detector/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .bias_detector import Detector
bias_detector/bias_detector.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import requests
3
+ from typing import List
4
+ import os
5
+ class Detector:
6
+ """
7
+ A class for detecting various forms of bias in text using pre-trained models.
8
+ """
9
+
10
+ def __init__(self, classifier, model_type):
11
+ """
12
+ Initializes the detector with a specific model.
13
+
14
+ Args:
15
+ classifier (str): The type of classifier to use.
16
+ model_type (str): The type of the model to use.
17
+ """
18
+
19
+ # Maps classifiers to their available models
20
+ self.classifier_model_mapping = {
21
+ "Token": {
22
+ "All": "wu981526092/Token-Level-Multidimensional-Bias-Detector",
23
+ "Race": "wu981526092/Token-Level-Race-Bias-Detector",
24
+ "Gender": "wu981526092/Token-Level-Gender-Bias-Detector",
25
+ "Profession": "wu981526092/Token-Level-Profession-Bias-Detector",
26
+ "Religion": "wu981526092/Token-Level-Religion-Bias-Detector",
27
+ },
28
+ "Sentence": {
29
+ "All":None,
30
+ "Religion": "wu981526092/Sentence-Level-Religion-Bias-Detector",
31
+ "Profession": "wu981526092/Sentence-Level-Profession-Bias-Detector",
32
+ "Race": "wu981526092/Sentence-Level-Race-Bias-Detector",
33
+ "Gender": "wu981526092/Sentence-Level-Gender-Bias-Detector",
34
+ }
35
+ }
36
+
37
+ self.SD_SL_label_mapping = {
38
+ 'LABEL_0': 'stereotype',
39
+ 'LABEL_1': 'anti-stereotype',
40
+ 'LABEL_2': 'unrelated'
41
+ }
42
+
43
+ self.MD_SL_label_mapping = {
44
+ 'LABEL_0': 'unrelated',
45
+ 'LABEL_1': 'stereotype_gender',
46
+ 'LABEL_2': 'anti-stereotype_gender',
47
+ 'LABEL_3': 'stereotype_race',
48
+ 'LABEL_4': 'anti-stereotype_race',
49
+ 'LABEL_5': 'stereotype_profession',
50
+ 'LABEL_6': 'anti-stereotype_profession',
51
+ 'LABEL_7': 'stereotype_religion',
52
+ 'LABEL_8': 'anti-stereotype_religion'
53
+ }
54
+
55
+ self.classifier = classifier
56
+ self.model_type = model_type
57
+
58
+ if classifier not in self.classifier_model_mapping:
59
+ raise ValueError(f"Invalid classifier. Expected one of: {list(self.classifier_model_mapping.keys())}")
60
+
61
+ if model_type not in self.classifier_model_mapping[classifier]:
62
+ raise ValueError(
63
+ f"Invalid model_type for {classifier}. Expected one of: {list(self.classifier_model_mapping[classifier].keys())}")
64
+
65
+ self.model_path = self.classifier_model_mapping[classifier][model_type]
66
+
67
+ # Create the API endpoint from the model path
68
+ self.API_URL = f"https://api-inference.huggingface.co/models/{self.model_path}"
69
+ API_token = os.getenv("BIAS_DETECTOR_API_KEY")
70
+ #API_token = "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"
71
+ # Add authorization token (if required)
72
+ self.headers = {"Authorization": f"Bearer {API_token}"} # Replace `your_api_token` with your token
73
+
74
+ import time
75
+
76
+ import time
77
+
78
+ def query(self, payload, max_retries=5, wait_time=5):
79
+ retries = 0
80
+
81
+ while retries <= max_retries:
82
+ response = requests.post(self.API_URL, headers=self.headers, json=payload).json()
83
+
84
+ # If the model is loading, wait for the estimated time and retry
85
+ if 'error' in response and 'estimated_time' in response:
86
+ print(f"Model is currently loading. Waiting for {response['estimated_time']} seconds.")
87
+ time.sleep(response['estimated_time'])
88
+ retries += 1
89
+ continue
90
+
91
+ # If the service is unavailable, wait for some time and retry
92
+ if 'error' in response and response['error'] == "Service Unavailable":
93
+ print(f"Service is unavailable. Waiting for {wait_time} seconds before retrying...")
94
+ time.sleep(wait_time)
95
+ retries += 1
96
+ continue
97
+
98
+ # If any other error is received, raise a RuntimeError
99
+ if 'error' in response:
100
+ raise RuntimeError(f"Error: {response['error']}")
101
+
102
+ return response
103
+
104
+ # If the maximum number of retries has been reached and the request is still failing, raise a RuntimeError
105
+ raise RuntimeError(f"Error: Service Unavailable. Failed after {max_retries} retries.")
106
+
107
+ def predict(self, texts: List[str]):
108
+ """
109
+ Predicts the bias of the given text or list of texts.
110
+
111
+ Args:
112
+ texts (List[str]): A list of strings to analyze.
113
+
114
+ Returns:
115
+ A list of dictionaries. Each dictionary contains the 'label' and 'score' for each text.
116
+ """
117
+ if not all(isinstance(text, str) for text in texts):
118
+ raise ValueError("All elements in 'texts' should be of str type")
119
+
120
+ results = []
121
+
122
+ # Prepare the payload
123
+ payload = {"inputs": texts}
124
+
125
+ # Query the API
126
+ try:
127
+ predictions = self.query(payload)
128
+ except RuntimeError as e:
129
+ print("Prediction failed due to a RuntimeError")
130
+ raise e
131
+ except Exception as e:
132
+ print("Prediction failed due to an unknown error")
133
+ raise e
134
+
135
+ # Process the predictions
136
+ for text, prediction in zip(texts, predictions):
137
+ # Check if an error occurred while loading the model
138
+ if 'error' in prediction:
139
+ print(f"Error for text '{text}': {prediction['error']}")
140
+ results.append({text: {'error': prediction['error']}})
141
+ else:
142
+ result = {}
143
+ if self.classifier == 'Token':
144
+ for item in prediction:
145
+ result[item['word']] = {item['entity_group']: item['score']}
146
+ elif self.classifier == 'Sentence' and self.model_type == 'All':
147
+ result = {self.MD_SL_label_mapping.get(item['label'].split('__')[-1], 'unknown'): item['score'] for item in
148
+ prediction}
149
+ else:
150
+ result = {self.SD_SL_label_mapping.get(item['label'].split('__')[-1], 'unknown'): item['score'] for item in
151
+ prediction}
152
+ results.append({text: result})
153
+
154
+ return results
155
+
156
+
157
+ if __name__ == '__main__':
158
+ detector_test = Detector("Sentence","Gender")
159
+ test_data = ["The girl performed poorly at reading in school.","Sir is an address showing respect for a person. It usually refers to a male."]
160
+ result = detector_test.predict(test_data)
161
+ print(result)
162
+ print(result[1][test_data[1]])
requirements ADDED
@@ -0,0 +1 @@
 
 
1
+ requests