import gradio as gr
import requests
import emoji
import re
import json
from thefuzz import process, fuzz
import numpy as np
import re
from string import punctuation
import time


API_URL = "https://api-inference.huggingface.co/models/Dabid/abusive-tagalog-profanity-detection"
headers = {"Authorization": "Bearer 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"}

def query(text):
    text = {"inputs": text}
    response = requests.post(API_URL, headers=headers, json=text)
    return response.json()

def read_text(filename, filetype='txt'):
    words = []

    if filetype == 'txt':
        with open(filename + '.txt') as file:
            words = [line.rstrip() for line in file]
            words = list(set(words))
    elif filetype == 'json':
        with open(filename + '.json') as json_file:
            words = json.load(json_file)
    
    return words


contractions = read_text('contractions', 'json')
lookup_words = read_text('lookup_words')
obj_pronouns = read_text('obj_pronouns')
profanities = read_text('profanities', 'json')

loading_countdown = 0

def fuzzy_lookup(tweet):

    matched_profanity = dict()
    lookup_profanity = np.concatenate([np.hstack(list(profanities.values())), list(profanities.keys())])

    for word in tweet.split():
        scores = []
        matched_words = []
        word = word.strip(punctuation)
        processed_word = re.sub("[^a-zA-Z0-9@]", "", word)

        if len(processed_word) >= 4:
            # Get fuzzy ratio
            for lookup_word in lookup_words:
                score = fuzz.ratio(processed_word, lookup_word)
                if score >= 70:
                    scores.append(score)
                    matched_words.append(lookup_word)
            if len(scores) > 0:
                max_score_index = np.argmax(scores)
                if matched_words[max_score_index] in lookup_profanity:
                    matched_profanity[word] = matched_words[max_score_index]

    for word, profanity in matched_profanity.items():
        word_split = word.split(profanity[-2:])
        for pronoun in obj_pronouns:
            if len(word_split) > 1:
                if pronoun == word_split[-1]:
                    matched_profanity[word] = profanity + ' ' + pronoun
                    break

    # Replace each profanities by fuzzy lookup result
    for word, profanity in matched_profanity.items():
        tweet = tweet.replace(word, profanity)

    for profanity, prof_varations in profanities.items():
        if len(prof_varations) > 0:
            for prof_variant in prof_varations:
                tweet = tweet.replace(prof_variant, profanity)

    return tweet, matched_profanity


def preprocess(tweet):
    
    tweet = tweet.lower()
    tweet = emoji.replace_emoji(tweet, replace='')

    # Elongated words conversion
    tweet = re.sub(r'(.)\1{2,}', r'\1', tweet)

    row_split = tweet.split()

    for index, word in enumerate(row_split):

        # Remove links
        if 'http' in word:
            row_split[index] = ''

        # Unify laugh texts format to 'haha'
        laugh_texts = ['hahaha', 'wahaha', 'hahaa', 'ahha', 'haaha', 'hahah', 'ahah', 'hha']
        if any(x in word for x in laugh_texts):
            row_split[index] = 'haha'

    # Combine list of words back to sentence
    preprocessed_tweet = ' '.join(filter(None, row_split))

    # Fuzzy Lookup
    preprocessed_tweet, matches = fuzzy_lookup(preprocessed_tweet)

    if len(preprocessed_tweet.split()) == 1:
        return preprocessed_tweet, matches

    # Expand Contractions
    for i in contractions.items():
        preprocessed_tweet = re.sub(rf"\b{i[0]}\b", i[1], preprocessed_tweet)

    return preprocessed_tweet, matches 


def predict(tweet):
    global loading_countdown 
    
    preprocessed_tweet, matched_profanity = preprocess(tweet)

    prediction = query(preprocessed_tweet)

    if type(prediction) == dict: 
        loading_time = prediction['estimated_time']
        return f"Loading Model (Estimated Time: {loading_time} Seconds)"

    
    if bool(matched_profanity) == False:
        return "No Profanity"
        
    
    prediction = [tuple(i.values()) for i in prediction[0]]
    prediction = dict((x, y) for x, y in prediction)
    
    print("\nTWEET:", tweet)
    print("DETECTED PROFANITY:", matched_profanity)
    print("LABELS:", prediction, "\n")

    return prediction


demo = gr.Interface(
    fn=predict,

    inputs=[gr.components.Textbox(lines=5, placeholder='Enter your input here', label='INPUT')],

    outputs=[gr.components.Label(num_top_classes=2, label="PREDICTION")],

    examples=['Tangina mo naman sobrang yabang mo gago!!😠😤 @davidrafael',
              'Napakainit ngayong araw pakshet namaaan!!',
              'Napakabagal naman ng wifi tangina #PLDC #HelloDITO',
              'Bobo ka ba? napakadali lang nyan eh... 🤡',
              'Uy gago laptrip yung nangyare samen kanina HAHAHA😂😂'],
)

demo.launch(debug=True)