| |
| """ |
| Simple baseline gender bias detector using basic keyword matching. |
| Used as sanity check baseline for comparison with rule-based approach. |
| """ |
|
|
| import csv |
| import re |
| from typing import List, Tuple, Dict |
|
|
| class SimpleBaselineDetector: |
| """Basic keyword-based bias detector as baseline""" |
| |
| def __init__(self): |
| |
| self.gendered_keywords = { |
| 'en': ['he', 'she', 'his', 'her', 'him', 'chairman', 'waitress', 'policeman', 'businessman'], |
| 'sw': ['yeye', 'mwanaume', 'mwanamke', 'baba', 'mama'], |
| 'ha': ['shi', 'ita', 'namiji', 'mace'], |
| 'ig': ['nwoke', 'nwanyi', 'ya', 'o'], |
| 'yo': ['ọkunrin', 'obinrin', 'o', 'oun'] |
| } |
| |
| def detect_bias(self, text: str, language: str) -> bool: |
| """Simple detection: return True if any gendered keyword found""" |
| if language not in self.gendered_keywords: |
| return False |
| |
| text_lower = text.lower() |
| keywords = self.gendered_keywords[language] |
| |
| for keyword in keywords: |
| if re.search(r'\b' + keyword + r'\b', text_lower): |
| return True |
| return False |
|
|
| def evaluate_baseline(ground_truth_file: str, language: str) -> Dict: |
| """Evaluate baseline detector on ground truth""" |
| detector = SimpleBaselineDetector() |
| |
| tp = fp = tn = fn = 0 |
| |
| with open(ground_truth_file, 'r', encoding='utf-8') as f: |
| reader = csv.DictReader(f) |
| for row in reader: |
| text = row['text'].strip('"') |
| actual_bias = row['has_bias'] == 'true' |
| predicted_bias = detector.detect_bias(text, language) |
| |
| if actual_bias and predicted_bias: |
| tp += 1 |
| elif not actual_bias and predicted_bias: |
| fp += 1 |
| elif not actual_bias and not predicted_bias: |
| tn += 1 |
| else: |
| fn += 1 |
| |
| precision = tp / (tp + fp) if (tp + fp) > 0 else 0 |
| recall = tp / (tp + fn) if (tp + fn) > 0 else 0 |
| f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0 |
| |
| return { |
| 'language': language, |
| 'precision': precision, |
| 'recall': recall, |
| 'f1': f1, |
| 'tp': tp, |
| 'fp': fp, |
| 'tn': tn, |
| 'fn': fn |
| } |
|
|
| if __name__ == "__main__": |
| languages = ['en', 'sw', 'ha', 'ig', 'yo'] |
| |
| print("Baseline Evaluation Results:") |
| print("=" * 50) |
| |
| for lang in languages: |
| try: |
| results = evaluate_baseline(f'ground_truth_{lang}.csv', lang) |
| print(f"{lang.upper()}: F1={results['f1']:.3f}, P={results['precision']:.3f}, R={results['recall']:.3f}") |
| except FileNotFoundError: |
| print(f"{lang.upper()}: File not found") |