import gradio as gr import re from services.anonymizer import anonymize from services.hate_speech import classify_hatespeech from services.cola import classify_correctness from services.bad_words import identify_bad_words examples = ['John is a son of a.', 'John a wonderful item', 'The dog is a bastard.', 'The dog a cat.', 'It was Peter Thiel from PayPal.'] def check_ethical(text): # simple heuristic based on offensive word list by cmu.edu text_has_bad_words = identify_bad_words(text) if len(text_has_bad_words) > 0: return {'status': 'Input contains offensive words.', 'data': text_has_bad_words} # based on SpaCy NER recognition anonymized = anonymize(text) # based on DistilRoberta hosted on transformers nice = float(classify_hatespeech(anonymized)) if nice < .8: return {'status': 'Input contains hate speech.', 'data': nice} # based on DistilBert hosted on transformers linguistic_incorrect = float(classify_correctness(anonymized)) if linguistic_incorrect < .8: return {'status': 'Input is linguistically inacceptable.', 'data': anonymized} return {'status': "ethical", 'data': anonymized} title = 'Ethical Integrity Demo' description = '''This demo evaluates texts for polarization, manipulative language, anonymizes, and filters for hate speech.\n It is an attempt for systems to ensure that only messages that meet the higher standards of ethical responsibility are used for further processing such as machine learning training.''' demo = gr.Interface(fn=check_ethical, inputs='text', outputs='text', examples=examples, title=title, description=description) demo.launch()