import gradio as gr
import re
 
from services.anonymizer import anonymize
from services.hate_speech import classify_hatespeech
from services.cola import classify_correctness
from services.bad_words import identify_bad_words

examples = ['John is a son of a.', 'John a wonderful item', 'The dog is a bastard.',
            'The dog a cat.', 'It was Peter Thiel from PayPal.']

def check_ethical(text):
    
    # simple heuristic based on offensive word list by cmu.edu
    text_has_bad_words = identify_bad_words(text)
    if len(text_has_bad_words) > 0:
        return {'status': 'Input contains offensive words.', 'data': text_has_bad_words}
    
    # based on SpaCy NER recognition
    anonymized = anonymize(text)
    
    # based on DistilRoberta hosted on transformers
    nice = float(classify_hatespeech(anonymized)) 
    if nice < .8:
        return {'status': 'Input contains hate speech.', 'data': nice}
    
    # based on DistilBert hosted on transformers
    linguistic_incorrect = float(classify_correctness(anonymized))
    if linguistic_incorrect < .8:
        return {'status': 'Input is linguistically inacceptable.', 'data': anonymized}
    
    return {'status': "ethical", 'data': anonymized}
        

title = 'Ethical Integrity Demo'
description = '''This demo evaluates texts for polarization, manipulative language, anonymizes, and filters for hate speech.\n
It is an attempt for systems to ensure that only messages that meet the higher standards of ethical responsibility are used for further processing such as machine learning training.'''

demo = gr.Interface(fn=check_ethical, inputs='text', outputs='text',
                    examples=examples, title=title, description=description)
demo.launch()