Spaces:
No application file
No application file
import spacy | |
from textblob import TextBlob | |
nlp = spacy.load('en_core_web_md') | |
# Define biased terms | |
biased_terms = [ | |
"motherhood", "fatherhood", "stay-at-home parent", "single parent", "working mom", "working dad", | |
"manpower", "man-hours", "man-made", "young", "old", "youthful", "elderly", "fresh", "experienced", | |
"race", "ethnicity", "color", "origin", "black", "white", "Asian", "Hispanic", "minority", "majority", | |
"rich", "poor", "wealthy", "impoverished", "disabled", "handicapped", "deaf", "blind", "religion", | |
"Christian", "Muslim", "Hindu", "Jewish", "atheist", "LGBT", "gay", "lesbian", "transgender", | |
"married", "single", "divorced", "widowed", "children", "family", "dumb", "intelligent", "beautiful", "ugly" | |
] | |
# Preprocess biased terms as spaCy docs | |
biased_docs = [nlp(term) for term in biased_terms] | |
def screen_for_bias(question, threshold=0.85): | |
""" | |
Checks if a question contains biased terms directly or has high similarity. | |
""" | |
doc = nlp(question) | |
max_similarity = 0 | |
for token in doc: | |
for biased_doc in biased_docs: | |
similarity = token.similarity(biased_doc) | |
if similarity > max_similarity: | |
max_similarity = similarity | |
if similarity >= threshold: | |
print(f"β οΈ Biased term detected: '{token.text}' similmmar to '{biased_doc.text}' ({similarity:.2f})") | |
return False, max_similarity # Mark as biased | |
return True, max_similarity # Unbiased with similarity score | |
def screen_for_offensive_language(question): | |
""" | |
Checks for offensive sentiment using TextBlob. | |
""" | |
sentiment = TextBlob(question).sentiment | |
if sentiment.polarity < -0.5: # Negative sentiment threshold | |
print(f"β Offensive sentiment detected: Polarity {sentiment.polarity}") | |
return False, sentiment.polarity | |
return True, sentiment.polarity | |
def combine_scores(score1, score2, bias_weight=0.7, sentiment_weight=0.3): | |
""" | |
Combines bias similarity and sentiment polarity into a single score. | |
""" | |
# Normalize sentiment score: (-1 to 1) β (0 to 1) | |
normalized_score2 = (1 - score2) / 2 # Positive β 0, Negative β 1 | |
# Weighted average | |
combined_score = (bias_weight * score1) + (sentiment_weight * normalized_score2) | |
return combined_score | |
def screen_questions(questions): | |
""" | |
Screens a list of questions for bias and offensive language. | |
Returns combined scores for each question. | |
""" | |
valid_questions = [] | |
invalid_questions = [] | |
combined_scores = [] | |
for question in questions: | |
is_unbiased, score1 = screen_for_bias(question) | |
is_non_offensive, score2 = screen_for_offensive_language(question) | |
combined_score = combine_scores(score1, score2) | |
combined_scores.append(combined_score) | |
if combined_score < 0.85: # Threshold for validity | |
valid_questions.append(question) | |
else: | |
invalid_questions.append(question) | |
accuracy = len(valid_questions) / len(questions) if questions else 0 | |
return valid_questions, invalid_questions, accuracy, combined_scores | |