wu981526092 commited on
Commit
8cb1d23
1 Parent(s): 3ef2f27
.idea/Multidimensional_Multilevel_Bias_Detection.iml CHANGED
@@ -4,7 +4,7 @@
4
  <content url="file://$MODULE_DIR$">
5
  <excludeFolder url="file://$MODULE_DIR$/venv" />
6
  </content>
7
- <orderEntry type="inheritedJdk" />
8
  <orderEntry type="sourceFolder" forTests="false" />
9
  </component>
10
  </module>
 
4
  <content url="file://$MODULE_DIR$">
5
  <excludeFolder url="file://$MODULE_DIR$/venv" />
6
  </content>
7
+ <orderEntry type="jdk" jdkName="Python 3.9 (venv) (15)" jdkType="Python SDK" />
8
  <orderEntry type="sourceFolder" forTests="false" />
9
  </component>
10
  </module>
.idea/jupyter-settings.xml ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="JupyterPersistentConnectionParameters">
4
+ <option name="knownRemoteServers">
5
+ <list>
6
+ <JupyterConnectionParameters>
7
+ <option name="authType" value="notebook" />
8
+ <option name="token" value="99e27d08fa1f41223c14ce9f772e8d6f8984c33a5be25bea" />
9
+ <option name="urlString" value="http://localhost:8889/" />
10
+ <authParams2>
11
+ <map>
12
+ <entry key="token" value="99e27d08fa1f41223c14ce9f772e8d6f8984c33a5be25bea" />
13
+ </map>
14
+ </authParams2>
15
+ </JupyterConnectionParameters>
16
+ <JupyterConnectionParameters>
17
+ <option name="authType" value="notebook" />
18
+ <option name="token" value="6fd8dea83b554f6167f9697959d23f57f4c75641d1c87a3e" />
19
+ <option name="urlString" value="http://localhost:8890/" />
20
+ <authParams2>
21
+ <map>
22
+ <entry key="token" value="6fd8dea83b554f6167f9697959d23f57f4c75641d1c87a3e" />
23
+ </map>
24
+ </authParams2>
25
+ </JupyterConnectionParameters>
26
+ </list>
27
+ </option>
28
+ <option name="moduleParameters">
29
+ <map>
30
+ <entry key="$PROJECT_DIR$/../foundationsofai-main/notebooks/.idea/notebooks.iml">
31
+ <value>
32
+ <JupyterConnectionParameters>
33
+ <option name="managed" value="true" />
34
+ </JupyterConnectionParameters>
35
+ </value>
36
+ </entry>
37
+ <entry key="$PROJECT_DIR$/../turtle_face_recognition_tutorial/.idea/turtle_face_recognition_tutorial.iml">
38
+ <value>
39
+ <JupyterConnectionParameters>
40
+ <option name="managed" value="true" />
41
+ </JupyterConnectionParameters>
42
+ </value>
43
+ </entry>
44
+ </map>
45
+ </option>
46
+ </component>
47
+ </project>
.idea/misc.xml CHANGED
@@ -1,4 +1,4 @@
1
  <?xml version="1.0" encoding="UTF-8"?>
2
  <project version="4">
3
- <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (Multidimensional_Multilevel_Bias_Detection)" project-jdk-type="Python SDK" />
4
  </project>
 
1
  <?xml version="1.0" encoding="UTF-8"?>
2
  <project version="4">
3
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (venv) (15)" project-jdk-type="Python SDK" />
4
  </project>
bias_detector/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (260 Bytes). View file
 
bias_detector/__pycache__/bias_detector.cpython-311.pyc ADDED
Binary file (5.42 kB). View file
 
bias_detector/bias_detector.py CHANGED
@@ -1,7 +1,7 @@
1
- import time
2
- import requests
3
  from typing import List
 
4
  import os
 
5
  class Detector:
6
  """
7
  A class for detecting various forms of bias in text using pre-trained models.
@@ -15,8 +15,8 @@ class Detector:
15
  classifier (str): The type of classifier to use.
16
  model_type (str): The type of the model to use.
17
  """
18
-
19
- # Maps classifiers to their available models
20
  self.classifier_model_mapping = {
21
  "Token": {
22
  "All": "wu981526092/Token-Level-Multidimensional-Bias-Detector",
@@ -34,9 +34,6 @@ class Detector:
34
  }
35
  }
36
 
37
- self.classifier = classifier
38
- self.model_type = model_type
39
-
40
  if classifier not in self.classifier_model_mapping:
41
  raise ValueError(f"Invalid classifier. Expected one of: {list(self.classifier_model_mapping.keys())}")
42
 
@@ -45,43 +42,16 @@ class Detector:
45
  f"Invalid model_type for {classifier}. Expected one of: {list(self.classifier_model_mapping[classifier].keys())}")
46
 
47
  self.model_path = self.classifier_model_mapping[classifier][model_type]
 
 
 
 
 
 
 
 
 
48
 
49
- # Create the API endpoint from the model path
50
- self.API_URL = f"https://api-inference.huggingface.co/models/{self.model_path}"
51
- API_token = os.getenv("BIAS_DETECTOR_API_KEY")
52
- #API_token = "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"
53
-
54
- # Add authorization token (if required)
55
- self.headers = {"Authorization": f"Bearer {API_token}"} # Replace `your_api_token` with your token
56
-
57
- def query(self, payload, max_retries=5, wait_time=5):
58
- retries = 0
59
-
60
- while retries <= max_retries:
61
- response = requests.post(self.API_URL, headers=self.headers, json=payload).json()
62
-
63
- # If the model is loading, wait for the estimated time and retry
64
- if 'error' in response and 'estimated_time' in response:
65
- print(f"Model is currently loading. Waiting for {response['estimated_time']} seconds.")
66
- time.sleep(response['estimated_time'])
67
- retries += 1
68
- continue
69
-
70
- # If the service is unavailable, wait for some time and retry
71
- if 'error' in response and response['error'] == "Service Unavailable":
72
- print(f"Service is unavailable. Waiting for {wait_time} seconds before retrying...")
73
- time.sleep(wait_time)
74
- retries += 1
75
- continue
76
-
77
- # If any other error is received, raise a RuntimeError
78
- if 'error' in response:
79
- raise RuntimeError(f"Error: {response['error']}")
80
-
81
- return response
82
-
83
- # If the maximum number of retries has been reached and the request is still failing, raise a RuntimeError
84
- raise RuntimeError(f"Error: Service Unavailable. Failed after {max_retries} retries.")
85
 
86
  def predict(self, texts: List[str]):
87
  """
@@ -97,38 +67,15 @@ class Detector:
97
  raise ValueError("All elements in 'texts' should be of str type")
98
 
99
  results = []
100
-
101
- # Prepare the payload
102
- payload = {"inputs": texts}
103
-
104
- # Query the API
105
- try:
106
- predictions = self.query(payload)
107
- except RuntimeError as e:
108
- print("Prediction failed due to a RuntimeError")
109
- raise e
110
- except Exception as e:
111
- print("Prediction failed due to an unknown error")
112
- raise e
113
-
114
- # Process the predictions
115
  for text, prediction in zip(texts, predictions):
116
- # Check if an error occurred while loading the model
117
- if 'error' in prediction:
118
- print(f"Error for text '{text}': {prediction['error']}")
119
- results.append({text: {'error': prediction['error']}})
120
- else:
121
- result = {}
122
- if self.classifier == 'Token':
123
- for item in prediction:
124
- result[item['word']] = {item['entity_group']: item['score']}
125
- elif self.classifier == 'Sentence' and self.model_type == 'All':
126
- result = {item['label'].split('__')[-1]: item['score'] for item in
127
- prediction}
128
- else:
129
- result = {item['label'].split('__')[-1]: item['score'] for item in
130
- prediction}
131
- results.append({text: result})
132
 
133
  return results
134
 
@@ -138,4 +85,4 @@ if __name__ == '__main__':
138
  test_data = ["The girl performed poorly at reading in school.","Sir is an address showing respect for a person. It usually refers to a male."]
139
  result = detector_test.predict(test_data)
140
  print(result)
141
- print(result[1][test_data[1]])
 
 
 
1
  from typing import List
2
+ from transformers import pipeline, AutoTokenizer
3
  import os
4
+
5
  class Detector:
6
  """
7
  A class for detecting various forms of bias in text using pre-trained models.
 
15
  classifier (str): The type of classifier to use.
16
  model_type (str): The type of the model to use.
17
  """
18
+ self.classifier = classifier
19
+ self.model_type = model_type
20
  self.classifier_model_mapping = {
21
  "Token": {
22
  "All": "wu981526092/Token-Level-Multidimensional-Bias-Detector",
 
34
  }
35
  }
36
 
 
 
 
37
  if classifier not in self.classifier_model_mapping:
38
  raise ValueError(f"Invalid classifier. Expected one of: {list(self.classifier_model_mapping.keys())}")
39
 
 
42
  f"Invalid model_type for {classifier}. Expected one of: {list(self.classifier_model_mapping[classifier].keys())}")
43
 
44
  self.model_path = self.classifier_model_mapping[classifier][model_type]
45
+ #API_token = os.getenv("BIAS_DETECTOR_API_KEY")
46
+ API_token = "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"
47
+ # Using pipeline for inference
48
+ if classifier == 'Token':
49
+ task_type = "ner"
50
+ self.model = pipeline(task_type, model=self.model_path, tokenizer=AutoTokenizer.from_pretrained(self.model_path,use_auth_token=API_token),use_auth_token=API_token)
51
+ else:
52
+ task_type = "text-classification"
53
+ self.model = pipeline(task_type, model=self.model_path, tokenizer=AutoTokenizer.from_pretrained(self.model_path,use_auth_token=API_token),use_auth_token=API_token,return_all_scores= True)
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
  def predict(self, texts: List[str]):
57
  """
 
67
  raise ValueError("All elements in 'texts' should be of str type")
68
 
69
  results = []
70
+ predictions = self.model(texts)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  for text, prediction in zip(texts, predictions):
72
+ result = {}
73
+ if self.classifier == 'Token':
74
+ for item in prediction:
75
+ result[item['word']] = {item['entity']: item['score']}
76
+ elif self.classifier == 'Sentence':
77
+ result = {item['label'].split('__')[-1]: item['score'] for item in prediction}
78
+ results.append({text: result})
 
 
 
 
 
 
 
 
 
79
 
80
  return results
81
 
 
85
  test_data = ["The girl performed poorly at reading in school.","Sir is an address showing respect for a person. It usually refers to a male."]
86
  result = detector_test.predict(test_data)
87
  print(result)
88
+ print(result[1][test_data[1]])
requirements CHANGED
@@ -1 +1,4 @@
1
- requests
 
 
 
 
1
+ requests
2
+ transformers
3
+ torch
4
+ xformers