import gradio as gr
import fasttext

from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer

import random
import numpy as np
import pandas as pd
import torch


id2label = {0: "NEGATIVE", 1: "POSITIVE"}
label2id = {"NEGATIVE": 0, "POSITIVE": 1}


title = "Movie Review Score Discriminator"
description = "It is a program that classifies whether it is positive or negative by entering movie reviews.  \
                You can choose between the Korean version and the English version.  \
                It also provides a version called Any, which determines whether it is Korean or English and predicts it."


class LanguageIdentification:
    def __init__(self):
        pretrained_lang_model = "./lid.176.ftz"
        self.model = fasttext.load_model(pretrained_lang_model)

    def predict_lang(self, text):
        predictions = self.model.predict(text, k=2) # returns top 2 matching languages
        return predictions

LANGUAGE = LanguageIdentification()


def tokenized_data(tokenizer, inputs):
    return tokenizer.batch_encode_plus(
        [inputs],
        return_tensors="pt",
        padding="max_length",
        max_length=64,
        truncation=True)


examples = []
df = pd.read_csv('examples.csv', sep='\t', index_col='Unnamed: 0')
random.seed(100)
for i in range(15):
    idx = random.randint(0, 50)
    examples.extend([ ['Eng', df.iloc[idx, 0]], ['Kor', df.iloc[idx, 1]] ])


eng_model_name = "roberta-base"
eng_step = 1900
eng_tokenizer = AutoTokenizer.from_pretrained(eng_model_name)
eng_file_name = "{}-{}.pt".format(eng_model_name, eng_step)
eng_state_dict = torch.load(eng_file_name)
eng_model = AutoModelForSequenceClassification.from_pretrained(
    eng_model_name, num_labels=2, id2label=id2label, label2id=label2id,
    state_dict=eng_state_dict
)


kor_model_name = "klue/roberta-small"
kor_step = 2400
kor_tokenizer = AutoTokenizer.from_pretrained(kor_model_name)
kor_file_name = "{}-{}.pt".format(kor_model_name.replace('/', '_'), kor_step)
kor_state_dict = torch.load(kor_file_name)
kor_model = AutoModelForSequenceClassification.from_pretrained(
    kor_model_name, num_labels=2, id2label=id2label, label2id=label2id,
    state_dict=kor_state_dict
)


def builder(lang, text):
    if lang == 'Any':
        pred = LANGUAGE.predict_lang(text)
        if pred[0][0] == '__label__ko':
            lang = 'Kor'
        else: # '__label__en'
            lang = 'Eng'
        # else:
        #     raise NotImplementedError("It's neither Korean nor English.")
    if lang == 'Eng':
        model = eng_model
        tokenizer = eng_tokenizer
    if lang == 'Kor':
        model = kor_model
        tokenizer = kor_tokenizer
        
    inputs = tokenized_data(tokenizer, text)
    
    model.eval()
    with torch.no_grad():
        logits = model(input_ids=inputs['input_ids'], 
            attention_mask=inputs['attention_mask']).logits
    
    m = torch.nn.Softmax(dim=1)
    output = m(logits)
    # print(logits, output)

    prediction = torch.argmax(logits, axis=1)
    
    return {id2label[1]: output[0][1].item(), id2label[0]: output[0][0].item()}
    return id2label[prediction.item()]


demo = gr.Interface(builder, inputs=[gr.inputs.Dropdown(['Any', 'Eng', 'Kor']), "text"], 
                            outputs=gr.Label(num_top_classes=2, label='Result', color='CadetBlue'), 
                            # outputs='label',
                            title=title, description=description, examples=examples)


# demo3 = gr.Interface.load("models/mdj1412/movie_review_score_discriminator_eng", inputs="text", outputs="text", 
#                          title=title, theme="peach",
#                          allow_flagging="auto",
#                          description=description, examples=examples)

if __name__ == "__main__":
    # print(examples)
    demo.launch()
    # demo3.launch()