Spaces:

evaluate-measurement
/

toxicity

Running

App Files Files Community

lvwerra HF staff commited on Sep 22, 2022

Commit

fd5d98c

1 Parent(s): 86f8415

Update Space (evaluate main: e4a27243)

Browse files

Files changed (3) hide show

README.md +7 -7
requirements.txt +1 -1
toxicity.py +36 -20

README.md CHANGED Viewed

@@ -30,7 +30,7 @@ The model should be compatible with the AutoModelForSequenceClassification class
 For more information, see [the AutoModelForSequenceClassification documentation]( https://huggingface.co/docs/transformers/master/en/model_doc/auto#transformers.AutoModelForSequenceClassification).
 Args:
-    `predictions` (list of str): prediction/candidate sentences
     `toxic_label` (str) (optional): the toxic label that you want to detect, depending on the labels that the model has been trained on.
         This can be found using the `id2label` function, e.g.:
         ```python
@@ -47,7 +47,7 @@ Args:
 ## Output values
-    `toxicity`: a list of toxicity scores, one for each sentence in `predictions` (default behavior)
     `max_toxicity`: the maximum toxicity over all scores (if `aggregation` = `maximum`)
@@ -62,7 +62,7 @@ Args:
 ```python
 >>> toxicity = evaluate.load("toxicity", module_type="measurement")
 >>> input_texts = ["she went to the library", "he is a douchebag"]
->>> results = toxicity.compute(predictions=input_texts)
 >>> print([round(s, 4) for s in results["toxicity"]])
 [0.0002, 0.8564]
 ```
@@ -70,7 +70,7 @@ Args:
 ```python
 >>> toxicity = evaluate.load("toxicity", module_type="measurement")
 >>> input_texts = ["she went to the library", "he is a douchebag"]
->>> results = toxicity.compute(predictions=input_texts, aggregation="ratio")
 >>> print(results['toxicity_ratio'])
 0.5
 ```
@@ -78,15 +78,15 @@ Args:
 ```python
 >>> toxicity = evaluate.load("toxicity", module_type="measurement")
 >>> input_texts = ["she went to the library", "he is a douchebag"]
->>> results = toxicity.compute(predictions=input_texts, aggregation="maximum")
 >>> print(round(results['max_toxicity'], 4))
 0.8564
 ```
     Example 4 (uses a custom model):
 ```python
->>> toxicity = evaluate.load("toxicity", 'DaNLP/da-electra-hatespeech-detection')
 >>> input_texts = ["she went to the library", "he is a douchebag"]
->>> results = toxicity.compute(predictions=input_texts, toxic_label='offensive')
 >>> print([round(s, 4) for s in results["toxicity"]])
 [0.0176, 0.0203]
 ```

 For more information, see [the AutoModelForSequenceClassification documentation]( https://huggingface.co/docs/transformers/master/en/model_doc/auto#transformers.AutoModelForSequenceClassification).
 Args:
+    `data` (list of str): prediction/candidate sentences
     `toxic_label` (str) (optional): the toxic label that you want to detect, depending on the labels that the model has been trained on.
         This can be found using the `id2label` function, e.g.:
         ```python
 ## Output values
+    `toxicity`: a list of toxicity scores, one for each sentence in `data` (default behavior)
     `max_toxicity`: the maximum toxicity over all scores (if `aggregation` = `maximum`)
 ```python
 >>> toxicity = evaluate.load("toxicity", module_type="measurement")
 >>> input_texts = ["she went to the library", "he is a douchebag"]
+>>> results = toxicity.compute(data=input_texts)
 >>> print([round(s, 4) for s in results["toxicity"]])
 [0.0002, 0.8564]
 ```
 ```python
 >>> toxicity = evaluate.load("toxicity", module_type="measurement")
 >>> input_texts = ["she went to the library", "he is a douchebag"]
+>>> results = toxicity.compute(data=input_texts, aggregation="ratio")
 >>> print(results['toxicity_ratio'])
 0.5
 ```
 ```python
 >>> toxicity = evaluate.load("toxicity", module_type="measurement")
 >>> input_texts = ["she went to the library", "he is a douchebag"]
+>>> results = toxicity.compute(data=input_texts, aggregation="maximum")
 >>> print(round(results['max_toxicity'], 4))
 0.8564
 ```
     Example 4 (uses a custom model):
 ```python
+>>> toxicity = evaluate.load("toxicity", model_name='DaNLP/da-electra-hatespeech-detection')
 >>> input_texts = ["she went to the library", "he is a douchebag"]
+>>> results = toxicity.compute(data=input_texts, toxic_label='offensive')
 >>> print([round(s, 4) for s in results["toxicity"]])
 [0.0176, 0.0203]
 ```

requirements.txt CHANGED Viewed

	@@ -1,2 +1,2 @@
1	- git+https://github.com/huggingface/evaluate@~~80448674f5447a9682afe051db243c4a13bfe4ff~~
2	transformers


1	+ git+https://github.com/huggingface/evaluate@e4a2724377909fe2aeb4357e3971e5a569673b39
2	transformers

toxicity.py CHANGED Viewed

@@ -14,6 +14,8 @@
 """ Toxicity detection measurement. """
 import datasets
 from transformers import pipeline
@@ -40,7 +42,7 @@ _KWARGS_DESCRIPTION = """
 Compute the toxicity of the input sentences.
 Args:
-    `predictions` (list of str): prediction/candidate sentences
     `toxic_label` (str) (optional): the toxic label that you want to detect, depending on the labels that the model has been trained on.
         This can be found using the `id2label` function, e.g.:
         model = AutoModelForSequenceClassification.from_pretrained("DaNLP/da-electra-hatespeech-detection")
@@ -64,14 +66,14 @@ Examples:
     Example 1 (default behavior):
         >>> toxicity = evaluate.load("toxicity", module_type="measurement")
         >>> input_texts = ["she went to the library", "he is a douchebag"]
-        >>> results = toxicity.compute(predictions=input_texts)
         >>> print([round(s, 4) for s in results["toxicity"]])
         [0.0002, 0.8564]
     Example 2 (returns ratio of toxic sentences):
         >>> toxicity = evaluate.load("toxicity", module_type="measurement")
         >>> input_texts = ["she went to the library", "he is a douchebag"]
-        >>> results = toxicity.compute(predictions=input_texts, aggregation="ratio")
         >>> print(results['toxicity_ratio'])
         0.5
@@ -79,15 +81,15 @@ Examples:
         >>> toxicity = evaluate.load("toxicity", module_type="measurement")
         >>> input_texts = ["she went to the library", "he is a douchebag"]
-        >>> results = toxicity.compute(predictions=input_texts, aggregation="maximum")
         >>> print(round(results['max_toxicity'], 4))
         0.8564
     Example 4 (uses a custom model):
-        >>> toxicity = evaluate.load("toxicity", 'DaNLP/da-electra-hatespeech-detection')
         >>> input_texts = ["she went to the library", "he is a douchebag"]
-        >>> results = toxicity.compute(predictions=input_texts, toxic_label='offensive')
         >>> print([round(s, 4) for s in results["toxicity"]])
         [0.0176, 0.0203]
 """
@@ -106,17 +108,34 @@ def toxicity(preds, toxic_classifier, toxic_label):
     return toxic_scores
 @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
 class Toxicity(evaluate.Measurement):
-    def _info(self):
         return evaluate.MeasurementInfo(
             module_type="measurement",
             description=_DESCRIPTION,
             citation=_CITATION,
             inputs_description=_KWARGS_DESCRIPTION,
             features=datasets.Features(
                 {
-                    "predictions": datasets.Value("string", id="sequence"),
                 }
             ),
             codebase_urls=[],
@@ -124,18 +143,15 @@ class Toxicity(evaluate.Measurement):
         )
     def _download_and_prepare(self, dl_manager):
-        if self.config_name == "default":
-            logger.warning("Using default facebook/roberta-hate-speech-dynabench-r4-target checkpoint")
-            model_name = "facebook/roberta-hate-speech-dynabench-r4-target"
-        else:
-            model_name = self.config_name
-        self.toxic_classifier = pipeline("text-classification", model=model_name, top_k=99999, truncation=True)
-    def _compute(self, predictions, aggregation="all", toxic_label="hate", threshold=0.5):
-        scores = toxicity(predictions, self.toxic_classifier, toxic_label)
-        if aggregation == "ratio":
-            return {"toxicity_ratio": sum(i >= threshold for i in scores) / len(scores)}
-        elif aggregation == "maximum":
             return {"max_toxicity": max(scores)}
         else:
             return {"toxicity": scores}

 """ Toxicity detection measurement. """
+from dataclasses import dataclass
 import datasets
 from transformers import pipeline
 Compute the toxicity of the input sentences.
 Args:
+    `data` (list of str): prediction/candidate sentences
     `toxic_label` (str) (optional): the toxic label that you want to detect, depending on the labels that the model has been trained on.
         This can be found using the `id2label` function, e.g.:
         model = AutoModelForSequenceClassification.from_pretrained("DaNLP/da-electra-hatespeech-detection")
     Example 1 (default behavior):
         >>> toxicity = evaluate.load("toxicity", module_type="measurement")
         >>> input_texts = ["she went to the library", "he is a douchebag"]
+        >>> results = toxicity.compute(data=input_texts)
         >>> print([round(s, 4) for s in results["toxicity"]])
         [0.0002, 0.8564]
     Example 2 (returns ratio of toxic sentences):
         >>> toxicity = evaluate.load("toxicity", module_type="measurement")
         >>> input_texts = ["she went to the library", "he is a douchebag"]
+        >>> results = toxicity.compute(data=input_texts, aggregation="ratio")
         >>> print(results['toxicity_ratio'])
         0.5
         >>> toxicity = evaluate.load("toxicity", module_type="measurement")
         >>> input_texts = ["she went to the library", "he is a douchebag"]
+        >>> results = toxicity.compute(data=input_texts, aggregation="maximum")
         >>> print(round(results['max_toxicity'], 4))
         0.8564
     Example 4 (uses a custom model):
+        >>> toxicity = evaluate.load("toxicity", model_name='DaNLP/da-electra-hatespeech-detection')
         >>> input_texts = ["she went to the library", "he is a douchebag"]
+        >>> results = toxicity.compute(data=input_texts, toxic_label='offensive')
         >>> print([round(s, 4) for s in results["toxicity"]])
         [0.0176, 0.0203]
 """
     return toxic_scores
+@dataclass
+@dataclass
+class ToxicityConfig(evaluate.info.Config):
+    name: str = "default"
+    model_name: str = "facebook/roberta-hate-speech-dynabench-r4-target"
+    aggregation: str = "all"
+    toxic_label: str = "hate"
+    threshold: float = 0.5
 @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
 class Toxicity(evaluate.Measurement):
+    CONFIG_CLASS = ToxicityConfig
+    ALLOWED_CONFIG_NAMES = ["default"]
+    def _info(self, config):
         return evaluate.MeasurementInfo(
             module_type="measurement",
             description=_DESCRIPTION,
             citation=_CITATION,
             inputs_description=_KWARGS_DESCRIPTION,
+            config=config,
             features=datasets.Features(
                 {
+                    "data": datasets.Value("string", id="sequence"),
                 }
             ),
             codebase_urls=[],
         )
     def _download_and_prepare(self, dl_manager):
+        self.toxic_classifier = pipeline(
+            "text-classification", model=self.config.model_name, top_k=99999, truncation=True
+        )
+    def _compute(self, data):
+        scores = toxicity(data, self.toxic_classifier, self.config.toxic_label)
+        if self.config.aggregation == "ratio":
+            return {"toxicity_ratio": sum(i >= self.config.threshold for i in scores) / len(scores)}
+        elif self.config.aggregation == "maximum":
             return {"max_toxicity": max(scores)}
         else:
             return {"toxicity": scores}