toxic-comment-classifier_rlhf

Paused

App Files Files Community

JanviMl commited on Mar 25

Commit

c586725

verified ·

1 Parent(s): 9d22dca

Update classifier.py

Browse files

Files changed (1) hide show

classifier.py +18 -2

classifier.py CHANGED Viewed

@@ -1,8 +1,9 @@
 # classifier.py
 import torch
 from model_loader import classifier_model
 from paraphraser import paraphrase_comment
-from metrics import compute_semantic_similarity, compute_empathy_score, compute_rouge_score
 def classify_toxic_comment(comment):
     """
@@ -10,6 +11,9 @@ def classify_toxic_comment(comment):
     If toxic, paraphrase the comment, re-evaluate, and compute essential metrics.
     Returns the prediction label, confidence, color, toxicity score, bias score, paraphrased comment (if applicable), and its metrics.
     """
     if not comment.strip():
         return "Error: Please enter a comment.", None, None, None, None, None, None, None, None, None, None, None, None
@@ -18,6 +22,7 @@ def classify_toxic_comment(comment):
     tokenizer = classifier_model.tokenizer
     # Tokenize the input comment
     inputs = tokenizer(comment, return_tensors="pt", truncation=True, padding=True, max_length=512)
     # Run inference
@@ -38,6 +43,7 @@ def classify_toxic_comment(comment):
     # Simulate Bias Score (placeholder)
     bias_score = 0.01 if label == "Non-Toxic" else 0.15
     bias_score = round(bias_score, 2)
     # If the comment is toxic, paraphrase it and compute essential metrics
     paraphrased_comment = None
@@ -48,13 +54,17 @@ def classify_toxic_comment(comment):
     paraphrased_bias_score = None
     semantic_similarity = None
     empathy_score = None
     rouge_scores = None
     if label == "Toxic":
         # Paraphrase the comment
         paraphrased_comment = paraphrase_comment(comment)
         # Re-evaluate the paraphrased comment
         paraphrased_inputs = tokenizer(paraphrased_comment, return_tensors="pt", truncation=True, padding=True, max_length=512)
         with torch.no_grad():
             paraphrased_outputs = model(**paraphrased_inputs)
@@ -68,15 +78,21 @@ def classify_toxic_comment(comment):
         paraphrased_toxicity_score = round(paraphrased_toxicity_score, 2)
         paraphrased_bias_score = 0.01 if paraphrased_label == "Non-Toxic" else 0.15  # Placeholder
         paraphrased_bias_score = round(paraphrased_bias_score, 2)
         # Compute essential metrics
         semantic_similarity = compute_semantic_similarity(comment, paraphrased_comment)
         empathy_score = compute_empathy_score(paraphrased_comment)
         rouge_scores = compute_rouge_score(comment, paraphrased_comment)
     return (
         f"Prediction: {label}", confidence, label_color, toxicity_score, bias_score,
         paraphrased_comment, f"Prediction: {paraphrased_label}" if paraphrased_comment else None,
         paraphrased_confidence, paraphrased_color, paraphrased_toxicity_score, paraphrased_bias_score,
-        semantic_similarity, empathy_score, rouge_scores
     )

 # classifier.py
 import torch
+import time
 from model_loader import classifier_model
 from paraphraser import paraphrase_comment
+from metrics import compute_semantic_similarity, compute_empathy_score, compute_bleu_score, compute_rouge_score
 def classify_toxic_comment(comment):
     """
     If toxic, paraphrase the comment, re-evaluate, and compute essential metrics.
     Returns the prediction label, confidence, color, toxicity score, bias score, paraphrased comment (if applicable), and its metrics.
     """
+    start_total = time.time()
+    print("Starting classification...")
     if not comment.strip():
         return "Error: Please enter a comment.", None, None, None, None, None, None, None, None, None, None, None, None
     tokenizer = classifier_model.tokenizer
     # Tokenize the input comment
+    start_classification = time.time()
     inputs = tokenizer(comment, return_tensors="pt", truncation=True, padding=True, max_length=512)
     # Run inference
     # Simulate Bias Score (placeholder)
     bias_score = 0.01 if label == "Non-Toxic" else 0.15
     bias_score = round(bias_score, 2)
+    print(f"Classification took {time.time() - start_classification:.2f} seconds")
     # If the comment is toxic, paraphrase it and compute essential metrics
     paraphrased_comment = None
     paraphrased_bias_score = None
     semantic_similarity = None
     empathy_score = None
+    bleu_score = None
     rouge_scores = None
     if label == "Toxic":
         # Paraphrase the comment
+        start_paraphrase = time.time()
         paraphrased_comment = paraphrase_comment(comment)
+        print(f"Paraphrasing took {time.time() - start_paraphrase:.2f} seconds")
         # Re-evaluate the paraphrased comment
+        start_reclassification = time.time()
         paraphrased_inputs = tokenizer(paraphrased_comment, return_tensors="pt", truncation=True, padding=True, max_length=512)
         with torch.no_grad():
             paraphrased_outputs = model(**paraphrased_inputs)
         paraphrased_toxicity_score = round(paraphrased_toxicity_score, 2)
         paraphrased_bias_score = 0.01 if paraphrased_label == "Non-Toxic" else 0.15  # Placeholder
         paraphrased_bias_score = round(paraphrased_bias_score, 2)
+        print(f"Reclassification of paraphrased comment took {time.time() - start_reclassification:.2f} seconds")
         # Compute essential metrics
+        start_metrics = time.time()
         semantic_similarity = compute_semantic_similarity(comment, paraphrased_comment)
         empathy_score = compute_empathy_score(paraphrased_comment)
+        bleu_score = compute_bleu_score(comment, paraphrased_comment)
         rouge_scores = compute_rouge_score(comment, paraphrased_comment)
+        print(f"Metrics computation took {time.time() - start_metrics:.2f} seconds")
+    print(f"Total processing time: {time.time() - start_total:.2f} seconds")
     return (
         f"Prediction: {label}", confidence, label_color, toxicity_score, bias_score,
         paraphrased_comment, f"Prediction: {paraphrased_label}" if paraphrased_comment else None,
         paraphrased_confidence, paraphrased_color, paraphrased_toxicity_score, paraphrased_bias_score,
+        semantic_similarity, empathy_score, bleu_score, rouge_scores
     )