Spaces:
Runtime error
Runtime error
Commit
•
8cb1d23
1
Parent(s):
3ef2f27
update
Browse files
.idea/Multidimensional_Multilevel_Bias_Detection.iml
CHANGED
@@ -4,7 +4,7 @@
|
|
4 |
<content url="file://$MODULE_DIR$">
|
5 |
<excludeFolder url="file://$MODULE_DIR$/venv" />
|
6 |
</content>
|
7 |
-
<orderEntry type="
|
8 |
<orderEntry type="sourceFolder" forTests="false" />
|
9 |
</component>
|
10 |
</module>
|
|
|
4 |
<content url="file://$MODULE_DIR$">
|
5 |
<excludeFolder url="file://$MODULE_DIR$/venv" />
|
6 |
</content>
|
7 |
+
<orderEntry type="jdk" jdkName="Python 3.9 (venv) (15)" jdkType="Python SDK" />
|
8 |
<orderEntry type="sourceFolder" forTests="false" />
|
9 |
</component>
|
10 |
</module>
|
.idea/jupyter-settings.xml
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="JupyterPersistentConnectionParameters">
|
4 |
+
<option name="knownRemoteServers">
|
5 |
+
<list>
|
6 |
+
<JupyterConnectionParameters>
|
7 |
+
<option name="authType" value="notebook" />
|
8 |
+
<option name="token" value="99e27d08fa1f41223c14ce9f772e8d6f8984c33a5be25bea" />
|
9 |
+
<option name="urlString" value="http://localhost:8889/" />
|
10 |
+
<authParams2>
|
11 |
+
<map>
|
12 |
+
<entry key="token" value="99e27d08fa1f41223c14ce9f772e8d6f8984c33a5be25bea" />
|
13 |
+
</map>
|
14 |
+
</authParams2>
|
15 |
+
</JupyterConnectionParameters>
|
16 |
+
<JupyterConnectionParameters>
|
17 |
+
<option name="authType" value="notebook" />
|
18 |
+
<option name="token" value="6fd8dea83b554f6167f9697959d23f57f4c75641d1c87a3e" />
|
19 |
+
<option name="urlString" value="http://localhost:8890/" />
|
20 |
+
<authParams2>
|
21 |
+
<map>
|
22 |
+
<entry key="token" value="6fd8dea83b554f6167f9697959d23f57f4c75641d1c87a3e" />
|
23 |
+
</map>
|
24 |
+
</authParams2>
|
25 |
+
</JupyterConnectionParameters>
|
26 |
+
</list>
|
27 |
+
</option>
|
28 |
+
<option name="moduleParameters">
|
29 |
+
<map>
|
30 |
+
<entry key="$PROJECT_DIR$/../foundationsofai-main/notebooks/.idea/notebooks.iml">
|
31 |
+
<value>
|
32 |
+
<JupyterConnectionParameters>
|
33 |
+
<option name="managed" value="true" />
|
34 |
+
</JupyterConnectionParameters>
|
35 |
+
</value>
|
36 |
+
</entry>
|
37 |
+
<entry key="$PROJECT_DIR$/../turtle_face_recognition_tutorial/.idea/turtle_face_recognition_tutorial.iml">
|
38 |
+
<value>
|
39 |
+
<JupyterConnectionParameters>
|
40 |
+
<option name="managed" value="true" />
|
41 |
+
</JupyterConnectionParameters>
|
42 |
+
</value>
|
43 |
+
</entry>
|
44 |
+
</map>
|
45 |
+
</option>
|
46 |
+
</component>
|
47 |
+
</project>
|
.idea/misc.xml
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
<project version="4">
|
3 |
-
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (
|
4 |
</project>
|
|
|
1 |
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
<project version="4">
|
3 |
+
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (venv) (15)" project-jdk-type="Python SDK" />
|
4 |
</project>
|
bias_detector/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (260 Bytes). View file
|
|
bias_detector/__pycache__/bias_detector.cpython-311.pyc
ADDED
Binary file (5.42 kB). View file
|
|
bias_detector/bias_detector.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
-
import time
|
2 |
-
import requests
|
3 |
from typing import List
|
|
|
4 |
import os
|
|
|
5 |
class Detector:
|
6 |
"""
|
7 |
A class for detecting various forms of bias in text using pre-trained models.
|
@@ -15,8 +15,8 @@ class Detector:
|
|
15 |
classifier (str): The type of classifier to use.
|
16 |
model_type (str): The type of the model to use.
|
17 |
"""
|
18 |
-
|
19 |
-
|
20 |
self.classifier_model_mapping = {
|
21 |
"Token": {
|
22 |
"All": "wu981526092/Token-Level-Multidimensional-Bias-Detector",
|
@@ -34,9 +34,6 @@ class Detector:
|
|
34 |
}
|
35 |
}
|
36 |
|
37 |
-
self.classifier = classifier
|
38 |
-
self.model_type = model_type
|
39 |
-
|
40 |
if classifier not in self.classifier_model_mapping:
|
41 |
raise ValueError(f"Invalid classifier. Expected one of: {list(self.classifier_model_mapping.keys())}")
|
42 |
|
@@ -45,43 +42,16 @@ class Detector:
|
|
45 |
f"Invalid model_type for {classifier}. Expected one of: {list(self.classifier_model_mapping[classifier].keys())}")
|
46 |
|
47 |
self.model_path = self.classifier_model_mapping[classifier][model_type]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
-
# Create the API endpoint from the model path
|
50 |
-
self.API_URL = f"https://api-inference.huggingface.co/models/{self.model_path}"
|
51 |
-
API_token = os.getenv("BIAS_DETECTOR_API_KEY")
|
52 |
-
#API_token = "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"
|
53 |
-
|
54 |
-
# Add authorization token (if required)
|
55 |
-
self.headers = {"Authorization": f"Bearer {API_token}"} # Replace `your_api_token` with your token
|
56 |
-
|
57 |
-
def query(self, payload, max_retries=5, wait_time=5):
|
58 |
-
retries = 0
|
59 |
-
|
60 |
-
while retries <= max_retries:
|
61 |
-
response = requests.post(self.API_URL, headers=self.headers, json=payload).json()
|
62 |
-
|
63 |
-
# If the model is loading, wait for the estimated time and retry
|
64 |
-
if 'error' in response and 'estimated_time' in response:
|
65 |
-
print(f"Model is currently loading. Waiting for {response['estimated_time']} seconds.")
|
66 |
-
time.sleep(response['estimated_time'])
|
67 |
-
retries += 1
|
68 |
-
continue
|
69 |
-
|
70 |
-
# If the service is unavailable, wait for some time and retry
|
71 |
-
if 'error' in response and response['error'] == "Service Unavailable":
|
72 |
-
print(f"Service is unavailable. Waiting for {wait_time} seconds before retrying...")
|
73 |
-
time.sleep(wait_time)
|
74 |
-
retries += 1
|
75 |
-
continue
|
76 |
-
|
77 |
-
# If any other error is received, raise a RuntimeError
|
78 |
-
if 'error' in response:
|
79 |
-
raise RuntimeError(f"Error: {response['error']}")
|
80 |
-
|
81 |
-
return response
|
82 |
-
|
83 |
-
# If the maximum number of retries has been reached and the request is still failing, raise a RuntimeError
|
84 |
-
raise RuntimeError(f"Error: Service Unavailable. Failed after {max_retries} retries.")
|
85 |
|
86 |
def predict(self, texts: List[str]):
|
87 |
"""
|
@@ -97,38 +67,15 @@ class Detector:
|
|
97 |
raise ValueError("All elements in 'texts' should be of str type")
|
98 |
|
99 |
results = []
|
100 |
-
|
101 |
-
# Prepare the payload
|
102 |
-
payload = {"inputs": texts}
|
103 |
-
|
104 |
-
# Query the API
|
105 |
-
try:
|
106 |
-
predictions = self.query(payload)
|
107 |
-
except RuntimeError as e:
|
108 |
-
print("Prediction failed due to a RuntimeError")
|
109 |
-
raise e
|
110 |
-
except Exception as e:
|
111 |
-
print("Prediction failed due to an unknown error")
|
112 |
-
raise e
|
113 |
-
|
114 |
-
# Process the predictions
|
115 |
for text, prediction in zip(texts, predictions):
|
116 |
-
|
117 |
-
if '
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
result = {}
|
122 |
-
|
123 |
-
for item in prediction:
|
124 |
-
result[item['word']] = {item['entity_group']: item['score']}
|
125 |
-
elif self.classifier == 'Sentence' and self.model_type == 'All':
|
126 |
-
result = {item['label'].split('__')[-1]: item['score'] for item in
|
127 |
-
prediction}
|
128 |
-
else:
|
129 |
-
result = {item['label'].split('__')[-1]: item['score'] for item in
|
130 |
-
prediction}
|
131 |
-
results.append({text: result})
|
132 |
|
133 |
return results
|
134 |
|
@@ -138,4 +85,4 @@ if __name__ == '__main__':
|
|
138 |
test_data = ["The girl performed poorly at reading in school.","Sir is an address showing respect for a person. It usually refers to a male."]
|
139 |
result = detector_test.predict(test_data)
|
140 |
print(result)
|
141 |
-
print(result[1][test_data[1]])
|
|
|
|
|
|
|
1 |
from typing import List
|
2 |
+
from transformers import pipeline, AutoTokenizer
|
3 |
import os
|
4 |
+
|
5 |
class Detector:
|
6 |
"""
|
7 |
A class for detecting various forms of bias in text using pre-trained models.
|
|
|
15 |
classifier (str): The type of classifier to use.
|
16 |
model_type (str): The type of the model to use.
|
17 |
"""
|
18 |
+
self.classifier = classifier
|
19 |
+
self.model_type = model_type
|
20 |
self.classifier_model_mapping = {
|
21 |
"Token": {
|
22 |
"All": "wu981526092/Token-Level-Multidimensional-Bias-Detector",
|
|
|
34 |
}
|
35 |
}
|
36 |
|
|
|
|
|
|
|
37 |
if classifier not in self.classifier_model_mapping:
|
38 |
raise ValueError(f"Invalid classifier. Expected one of: {list(self.classifier_model_mapping.keys())}")
|
39 |
|
|
|
42 |
f"Invalid model_type for {classifier}. Expected one of: {list(self.classifier_model_mapping[classifier].keys())}")
|
43 |
|
44 |
self.model_path = self.classifier_model_mapping[classifier][model_type]
|
45 |
+
#API_token = os.getenv("BIAS_DETECTOR_API_KEY")
|
46 |
+
API_token = "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"
|
47 |
+
# Using pipeline for inference
|
48 |
+
if classifier == 'Token':
|
49 |
+
task_type = "ner"
|
50 |
+
self.model = pipeline(task_type, model=self.model_path, tokenizer=AutoTokenizer.from_pretrained(self.model_path,use_auth_token=API_token),use_auth_token=API_token)
|
51 |
+
else:
|
52 |
+
task_type = "text-classification"
|
53 |
+
self.model = pipeline(task_type, model=self.model_path, tokenizer=AutoTokenizer.from_pretrained(self.model_path,use_auth_token=API_token),use_auth_token=API_token,return_all_scores= True)
|
54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
def predict(self, texts: List[str]):
|
57 |
"""
|
|
|
67 |
raise ValueError("All elements in 'texts' should be of str type")
|
68 |
|
69 |
results = []
|
70 |
+
predictions = self.model(texts)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
for text, prediction in zip(texts, predictions):
|
72 |
+
result = {}
|
73 |
+
if self.classifier == 'Token':
|
74 |
+
for item in prediction:
|
75 |
+
result[item['word']] = {item['entity']: item['score']}
|
76 |
+
elif self.classifier == 'Sentence':
|
77 |
+
result = {item['label'].split('__')[-1]: item['score'] for item in prediction}
|
78 |
+
results.append({text: result})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
|
80 |
return results
|
81 |
|
|
|
85 |
test_data = ["The girl performed poorly at reading in school.","Sir is an address showing respect for a person. It usually refers to a male."]
|
86 |
result = detector_test.predict(test_data)
|
87 |
print(result)
|
88 |
+
print(result[1][test_data[1]])
|
requirements
CHANGED
@@ -1 +1,4 @@
|
|
1 |
-
requests
|
|
|
|
|
|
|
|
1 |
+
requests
|
2 |
+
transformers
|
3 |
+
torch
|
4 |
+
xformers
|