Spaces:

wu981526092
/

Stereotype_Detection

Runtime error

App Files Files Community

wu981526092 commited on Jul 17, 2023

Commit

8cb1d23

•

1 Parent(s): 3ef2f27

update

Browse files

Files changed (7) hide show

.idea/Multidimensional_Multilevel_Bias_Detection.iml +1 -1
.idea/jupyter-settings.xml +47 -0
.idea/misc.xml +1 -1
bias_detector/__pycache__/__init__.cpython-311.pyc +0 -0
bias_detector/__pycache__/bias_detector.cpython-311.pyc +0 -0
bias_detector/bias_detector.py +22 -75
requirements +4 -1

.idea/Multidimensional_Multilevel_Bias_Detection.iml CHANGED Viewed

@@ -4,7 +4,7 @@
     <content url="file://$MODULE_DIR$">
       <excludeFolder url="file://$MODULE_DIR$/venv" />
     </content>
-    <orderEntry type="inheritedJdk" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
 </module>

     <content url="file://$MODULE_DIR$">
       <excludeFolder url="file://$MODULE_DIR$/venv" />
     </content>
+    <orderEntry type="jdk" jdkName="Python 3.9 (venv) (15)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
 </module>

.idea/jupyter-settings.xml ADDED Viewed

	@@ -0,0 +1,47 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="JupyterPersistentConnectionParameters">
+    <option name="knownRemoteServers">
+      <list>
+        <JupyterConnectionParameters>
+          <option name="authType" value="notebook" />
+          <option name="token" value="99e27d08fa1f41223c14ce9f772e8d6f8984c33a5be25bea" />
+          <option name="urlString" value="http://localhost:8889/" />
+          <authParams2>
+            <map>
+              <entry key="token" value="99e27d08fa1f41223c14ce9f772e8d6f8984c33a5be25bea" />
+            </map>
+          </authParams2>
+        </JupyterConnectionParameters>
+        <JupyterConnectionParameters>
+          <option name="authType" value="notebook" />
+          <option name="token" value="6fd8dea83b554f6167f9697959d23f57f4c75641d1c87a3e" />
+          <option name="urlString" value="http://localhost:8890/" />
+          <authParams2>
+            <map>
+              <entry key="token" value="6fd8dea83b554f6167f9697959d23f57f4c75641d1c87a3e" />
+            </map>
+          </authParams2>
+        </JupyterConnectionParameters>
+      </list>
+    </option>
+    <option name="moduleParameters">
+      <map>
+        <entry key="$PROJECT_DIR$/../foundationsofai-main/notebooks/.idea/notebooks.iml">
+          <value>
+            <JupyterConnectionParameters>
+              <option name="managed" value="true" />
+            </JupyterConnectionParameters>
+          </value>
+        </entry>
+        <entry key="$PROJECT_DIR$/../turtle_face_recognition_tutorial/.idea/turtle_face_recognition_tutorial.iml">
+          <value>
+            <JupyterConnectionParameters>
+              <option name="managed" value="true" />
+            </JupyterConnectionParameters>
+          </value>
+        </entry>
+      </map>
+    </option>
+  </component>
+</project>

.idea/misc.xml CHANGED Viewed

@@ -1,4 +1,4 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (Multidimensional_Multilevel_Bias_Detection)" project-jdk-type="Python SDK" />
 </project>

 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (venv) (15)" project-jdk-type="Python SDK" />
 </project>

bias_detector/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (260 Bytes). View file

bias_detector/__pycache__/bias_detector.cpython-311.pyc ADDED Viewed

Binary file (5.42 kB). View file

bias_detector/bias_detector.py CHANGED Viewed

@@ -1,7 +1,7 @@
-import time
-import requests
 from typing import List
 import os
 class Detector:
     """
     A class for detecting various forms of bias in text using pre-trained models.
@@ -15,8 +15,8 @@ class Detector:
             classifier (str): The type of classifier to use.
             model_type (str): The type of the model to use.
         """
-        # Maps classifiers to their available models
         self.classifier_model_mapping = {
                 "Token": {
                     "All": "wu981526092/Token-Level-Multidimensional-Bias-Detector",
@@ -34,9 +34,6 @@ class Detector:
                 }
         }
-        self.classifier = classifier
-        self.model_type = model_type
         if classifier not in self.classifier_model_mapping:
             raise ValueError(f"Invalid classifier. Expected one of: {list(self.classifier_model_mapping.keys())}")
@@ -45,43 +42,16 @@ class Detector:
                 f"Invalid model_type for {classifier}. Expected one of: {list(self.classifier_model_mapping[classifier].keys())}")
         self.model_path = self.classifier_model_mapping[classifier][model_type]
-        # Create the API endpoint from the model path
-        self.API_URL = f"https://api-inference.huggingface.co/models/{self.model_path}"
-        API_token = os.getenv("BIAS_DETECTOR_API_KEY")
-        #API_token = "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"
-        # Add authorization token (if required)
-        self.headers = {"Authorization": f"Bearer {API_token}"} # Replace `your_api_token` with your token
-    def query(self, payload, max_retries=5, wait_time=5):
-        retries = 0
-        while retries <= max_retries:
-            response = requests.post(self.API_URL, headers=self.headers, json=payload).json()
-            # If the model is loading, wait for the estimated time and retry
-            if 'error' in response and 'estimated_time' in response:
-                print(f"Model is currently loading. Waiting for {response['estimated_time']} seconds.")
-                time.sleep(response['estimated_time'])
-                retries += 1
-                continue
-            # If the service is unavailable, wait for some time and retry
-            if 'error' in response and response['error'] == "Service Unavailable":
-                print(f"Service is unavailable. Waiting for {wait_time} seconds before retrying...")
-                time.sleep(wait_time)
-                retries += 1
-                continue
-            # If any other error is received, raise a RuntimeError
-            if 'error' in response:
-                raise RuntimeError(f"Error: {response['error']}")
-            return response
-        # If the maximum number of retries has been reached and the request is still failing, raise a RuntimeError
-        raise RuntimeError(f"Error: Service Unavailable. Failed after {max_retries} retries.")
     def predict(self, texts: List[str]):
         """
@@ -97,38 +67,15 @@ class Detector:
             raise ValueError("All elements in 'texts' should be of str type")
         results = []
-        # Prepare the payload
-        payload = {"inputs": texts}
-        # Query the API
-        try:
-            predictions = self.query(payload)
-        except RuntimeError as e:
-            print("Prediction failed due to a RuntimeError")
-            raise e
-        except Exception as e:
-            print("Prediction failed due to an unknown error")
-            raise e
-        # Process the predictions
         for text, prediction in zip(texts, predictions):
-            # Check if an error occurred while loading the model
-            if 'error' in prediction:
-                print(f"Error for text '{text}': {prediction['error']}")
-                results.append({text: {'error': prediction['error']}})
-            else:
-                result = {}
-                if self.classifier == 'Token':
-                    for item in prediction:
-                        result[item['word']] = {item['entity_group']: item['score']}
-                elif self.classifier == 'Sentence' and self.model_type == 'All':
-                    result = {item['label'].split('__')[-1]: item['score'] for item in
-                              prediction}
-                else:
-                    result = {item['label'].split('__')[-1]: item['score'] for item in
-                              prediction}
-                results.append({text: result})
         return results
@@ -138,4 +85,4 @@ if __name__ == '__main__':
     test_data = ["The girl performed poorly at reading in school.","Sir is an address showing respect for a person. It usually refers to a male."]
     result = detector_test.predict(test_data)
     print(result)
-    print(result[1][test_data[1]])

 from typing import List
+from transformers import pipeline, AutoTokenizer
 import os
 class Detector:
     """
     A class for detecting various forms of bias in text using pre-trained models.
             classifier (str): The type of classifier to use.
             model_type (str): The type of the model to use.
         """
+        self.classifier = classifier
+        self.model_type = model_type
         self.classifier_model_mapping = {
                 "Token": {
                     "All": "wu981526092/Token-Level-Multidimensional-Bias-Detector",
                 }
         }
         if classifier not in self.classifier_model_mapping:
             raise ValueError(f"Invalid classifier. Expected one of: {list(self.classifier_model_mapping.keys())}")
                 f"Invalid model_type for {classifier}. Expected one of: {list(self.classifier_model_mapping[classifier].keys())}")
         self.model_path = self.classifier_model_mapping[classifier][model_type]
+        #API_token = os.getenv("BIAS_DETECTOR_API_KEY")
+        API_token = "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"
+        # Using pipeline for inference
+        if classifier == 'Token':
+            task_type = "ner"
+            self.model = pipeline(task_type, model=self.model_path, tokenizer=AutoTokenizer.from_pretrained(self.model_path,use_auth_token=API_token),use_auth_token=API_token)
+        else:
+            task_type = "text-classification"
+            self.model = pipeline(task_type, model=self.model_path, tokenizer=AutoTokenizer.from_pretrained(self.model_path,use_auth_token=API_token),use_auth_token=API_token,return_all_scores= True)
     def predict(self, texts: List[str]):
         """
             raise ValueError("All elements in 'texts' should be of str type")
         results = []
+        predictions = self.model(texts)
         for text, prediction in zip(texts, predictions):
+            result = {}
+            if self.classifier == 'Token':
+                for item in prediction:
+                    result[item['word']] = {item['entity']: item['score']}
+            elif self.classifier == 'Sentence':
+                result = {item['label'].split('__')[-1]: item['score'] for item in prediction}
+            results.append({text: result})
         return results
     test_data = ["The girl performed poorly at reading in school.","Sir is an address showing respect for a person. It usually refers to a male."]
     result = detector_test.predict(test_data)
     print(result)
+    print(result[1][test_data[1]])

requirements CHANGED Viewed

	@@ -1 +1,4 @@
1	- requests

+requests
+transformers
+torch
+xformers