shainaraza
/

toxicity_debias_pipeline

Model card Files Files and versions Community

shainaraza commited on Apr 6, 2023

Commit

50e4378

1 Parent(s): 6bd32ad

Upload 3 files

Browse files

Files changed (3) hide show

README.md +31 -0
my_toxicity_debiaser.py +68 -0
requirements.txt +2 -0

README.md ADDED Viewed

	@@ -0,0 +1,31 @@

+# My Toxicity Debiaser Pipeline
+This custom pipeline debiases toxic text using a toxicity classifier and GPT-2.
+## Usage
+To use this pipeline, you first need to download the required models and tokenizers, and then import the `MyToxicityDebiaserPipeline` class:
+```python
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, GPT2LMHeadModel, GPT2Tokenizer
+from my_toxicity_debiaser import MyToxicityDebiaserPipeline
+toxicity_model_name = "shainaraza/toxity_classify_debiaser"
+gpt_model_name = "gpt2"
+toxicity_tokenizer = AutoTokenizer.from_pretrained(toxicity_model_name)
+toxicity_model = AutoModelForSequenceClassification.from_pretrained(toxicity_model_name)
+gpt_tokenizer = GPT2Tokenizer.from_pretrained(gpt_model_name)
+gpt_model = GPT2LMHeadModel.from_pretrained(gpt_model_name)
+pipeline = MyToxicityDebiaserPipeline(
+    model=toxicity_model,
+    tokenizer=toxicity_tokenizer,
+    gpt_model=gpt_model,
+    gpt_tokenizer=gpt_tokenizer,
+)
+text = "Your example text here"
+result = pipeline(text)
+print(result)

my_toxicity_debiaser.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import torch
+class MyToxicityDebiaserPipeline(object):
+    def __init__(self, model, tokenizer, gpt_model, gpt_tokenizer, device=None, **kwargs):
+        self.model = model.to(device)
+        self.tokenizer = tokenizer
+        self.gpt_model = gpt_model.to(device)
+        self.gpt_tokenizer = gpt_tokenizer
+        self.device = device if device is not None else torch.device("cpu")
+    def _forward(self, inputs):
+        text = inputs["text"]
+        encoded = self.tokenizer(text, truncation=True, padding=True, return_tensors="pt").to(self.device)
+        logits = self.model(encoded.input_ids, encoded.attention_mask).logits
+        probs = torch.softmax(logits, dim=-1)
+        label = torch.argmax(probs, dim=-1).item()
+        return {"label": label, "probabilities": probs.tolist(), "text_input_ids": encoded.input_ids}
+    def _sanitize_parameters(self, **kwargs):
+        return kwargs, {}, {}
+    def preprocess(self, inputs):
+        return {"text": inputs}
+    def postprocess(self, outputs):
+        label = outputs["label"]
+        if label == 0:
+            prompt = "This comment is non-toxic."
+        elif label == 1:
+            prompt = "This comment is toxic but has been debiased as follows:"
+            text = self.tokenizer.decode(outputs["text_input_ids"][0])
+            debias_prompt = f"Remove the offensive words and biased tone and write the same sentence nicely: {text}"
+            encoded_debias_prompt = self.gpt_tokenizer.encode_plus(debias_prompt, return_tensors="pt").to(self.device)
+            generated = self.gpt_model.generate(
+                input_ids=encoded_debias_prompt["input_ids"],
+                attention_mask=encoded_debias_prompt["attention_mask"],
+                do_sample=True,
+                max_length=100,
+                top_p=0.95,
+                temperature=0.7,
+                pad_token_id=self.gpt_tokenizer.pad_token_id,
+                eos_token_id=self.gpt_tokenizer.eos_token_id,
+                early_stopping=True,
+            )
+            generated_text = self.gpt_tokenizer.decode(generated[0], skip_special_tokens=True)
+            prompt += f"\nOriginal text: {text}\nDebiased text: {generated_text}"
+        return prompt
+    def __call__(self, inputs, *args, **kwargs):
+        _args, _kwargs, forward_kwargs = self._sanitize_parameters(*args, **kwargs)
+        inputs = self.preprocess(inputs)
+        outputs = self._forward(inputs, **forward_kwargs)
+        return self.postprocess(outputs)
+# Create an instance of the custom pipeline
+custom_pipeline = MyToxicityDebiaserPipeline(
+    model=toxicity_model,
+    tokenizer=toxicity_tokenizer,
+    gpt_model=gpt_model,
+    gpt_tokenizer=gpt_tokenizer,
+    device=torch.device("cuda" if torch.cuda.is_available() else "cpu"),
+    binary_output=True
+)

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ transformers
2	+ torch