LM-Explanation-Demo

Runtime error

File size: 14,802 Bytes

import requests
import random
import time
import pandas as pd
import gradio as gr
import numpy as np


def read1(lang, num_selected_former):
    if lang in ['en']:
        fname = 'data1_en.txt'
    else:
        fname = 'data1_nl_10.txt'
    with open(fname, encoding='utf-8') as f:
        content = f.readlines()
        index_selected = random.randint(0,len(content)/2-1)
        while index_selected == num_selected_former:
            index_selected = random.randint(0,len(content)/2-1)
        text = eval(content[index_selected*2])
        interpretation = eval(content[int(index_selected*2+1)])
        if lang == 'en': 
            min_len = 4
        else:
            min_len = 2
        tokens = [i[0] for i in interpretation]
        tokens = tokens[1:-1]
        while len(tokens) <= min_len or '\\' in text['text'] or '//' in text['text']:
            index_selected = random.randint(0,len(content)/2-1)
            text = eval(content[int(index_selected*2)])
        res_tmp = [(i, 0) for i in tokens]
        res = {"original": text['text'], "interpretation": res_tmp}
        # res_empty = {"original": "", "interpretation": []}

        # res = []
        # res.append(("P", "+"))
        # res.append(("/", None))
        # res.append(("N", "-"))
        # res.append(("Review:", None))
        # for i in text['text'].split(' '):
        #     res.append((i, None))
        # res_empty = None
    # checkbox_update = gr.CheckboxGroup.update(choices=tokens, value=None)
    
    return res, lang, index_selected

def read1_written(lang):
    if lang in ['en']:
        fname = 'data1_en.txt'
    else:
        fname = 'data1_nl_10.txt'
    with open(fname, encoding='utf-8') as f:
        content = f.readlines()
        index_selected = random.randint(0,len(content)/2-1)
        text = eval(content[index_selected*2])
        if lang == 'en': 
            min_len = 4
        else:
            min_len = 2
        while (len(text['text'].split(' '))) <= min_len or '\\' in text['text'] or '//' in text['text']:
        # while (len(text['text'].split(' '))) <= min_len:
            index_selected = random.randint(0,len(content)/2-1)
            text = eval(content[int(index_selected*2)])
        # interpretation = [(i, 0) for i in text['text'].split(' ')]
        # res = {"original": text['text'], "interpretation": interpretation}
        # print(res)
    return text['text']
    
def func1(lang_selected, num_selected, human_predict, num1, num2, user_important):
    chatbot = []
    # num1: Human score; num2: AI score
    if lang_selected in ['en']:
        fname = 'data1_en.txt'
    else:
        fname = 'data1_nl_10.txt'
    with open(fname) as f:
        content = f.readlines()
        text = eval(content[int(num_selected*2)])
        interpretation = eval(content[int(num_selected*2+1)])
        if lang_selected in ['en']:
            golden_label = text['label'] * 25
        else:
            golden_label = text['label'] * 100

    '''
    # (START) API version -- quick
    
    API_URL = "https://api-inference.huggingface.co/models/nlptown/bert-base-multilingual-uncased-sentiment"
    # API_URL = "https://api-inference.huggingface.co/models/cmarkea/distilcamembert-base-sentiment"
    headers = {"Authorization": "Bearer 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"}

    response = requests.post(API_URL, headers=headers, json=text['text'])
    output = response.json()
    
    # result = dict()
    star2num = {
        "5 stars": 100,
        "4 stars": 75,
        "3 stars": 50,
        "2 stars": 25,
        "1 star": 0,
    }

    print(output)
    out = output[0][0]
    # (END) API version
    ''' 

    # (START) off-the-shelf version -- slow at the beginning
    # Load model directly
    from transformers import AutoTokenizer, AutoModelForSequenceClassification
    
    tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
    model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")

    # Use a pipeline as a high-level helper
    from transformers import pipeline

    classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
    output = classifier([text['text']])

    star2num = {
        "5 stars": 100,
        "4 stars": 75,
        "3 stars": 50,
        "2 stars": 25,
        "1 star": 0,
    }
    print(output)
    out = output[0]
    
    # (END) off-the-shelf version
    
    ai_predict = star2num[out['label']]
    # result[label] = out['score']

    user_select = "You focused on "
    flag_select = False
    if user_important == "":
        user_select += "nothing. Interesting! "
    else:
        user_select += user_important
        user_select += ". "
    # for i in range(len(user_marks)):
    #     if user_marks[i][1] != None and h1[i][0] not in ["P", "N"]:
    #         flag_select = True
    #         user_select += "'" + h1[i][0] + "'"
    #         if i == len(h1) - 1:
    #             user_select += ". "
    #         else:
    #             user_select += ", "
    # if not flag_select:
    #     user_select += "nothing. Interesting! "
    user_select += "Wanna see how the AI made the guess? Click here. ⬅️"
    if lang_selected in ['en']:
        if ai_predict == golden_label:
            if abs(human_predict - golden_label) < 12.5: # Both correct
                golden_label = int((human_predict + ai_predict) / 2)
                chatbot.append(("The correct answer is " + str(golden_label) + ". Congratulations! 🎉 Both of you get the correct answer!", user_select))
                num1 += 1
                num2 += 1
            else:
                golden_label += random.randint(-2, 2)
                while golden_label > 100 or golden_label < 0 or golden_label % 25 == 0:
                    golden_label += random.randint(-2, 2)
                chatbot.append(("The correct answer is " + str(golden_label) + ". Sorry.. AI wins in this round.", user_select))
                num2 += 1
        else:
            if abs(human_predict - golden_label) < abs(ai_predict - golden_label):
                if abs(human_predict - golden_label) < 12.5:
                    golden_label = int((golden_label + human_predict) / 2)
                    chatbot.append(("The correct answer is " + str(golden_label) + ". Great! 🎉 You are closer to the answer and better than AI!", user_select))
                    num1 += 1
                else:
                    chatbot.append(("The correct answer is " + str(golden_label) + ". Both wrong... Maybe next time you'll win!", user_select))
            else:
                chatbot.append(("The correct answer is " + str(golden_label) + ". Sorry.. No one gets the correct answer. But nice try! 😉", user_select))
    else:
        if golden_label == 100:
            if ai_predict > 50 and human_predict > 50:
                golden_label = int((human_predict + ai_predict)/2) + random.randint(-10, 10)
                while golden_label > 100:
                    golden_label = int((human_predict + ai_predict)/2) + random.randint(-10, 10)
                ai_predict = int((golden_label + ai_predict) / 2)
                chatbot.append(("The correct answer is " + str(golden_label) + ". Congratulations! 🎉 Both of you get the correct answer!", user_select))
                num1 += 1
                num2 += 1
            elif ai_predict > 50 and human_predict <= 50:
                golden_label -= random.randint(0, 10)
                ai_predict = 90 + random.randint(-5, 5)
                chatbot.append(("The correct answer is " + str(golden_label) + ". Sorry.. AI wins in this round.", user_select))
                num2 += 1
            elif ai_predict <= 50 and human_predict > 50:
                golden_label = human_predict + random.randint(-4, 4)
                while golden_label > 100:
                    golden_label = human_predict + random.randint(-4, 4)
                chatbot.append(("The correct answer is " + str(golden_label) + ". Great! 🎉 You are close to the answer and better than AI!", user_select))
                num1 += 1
            else:
                chatbot.append(("The correct answer is " + str(golden_label) + ". Sorry... No one gets the correct answer. But nice try! 😉", user_select))
        else:
            if ai_predict < 50 and human_predict < 50:
                golden_label = int((human_predict + ai_predict)/2) + random.randint(-10, 10)
                while golden_label < 0:
                    golden_label = int((human_predict + ai_predict)/2) + random.randint(-10, 10)
                ai_predict = int((golden_label + ai_predict) / 2)
                chatbot.append(("The correct answer is " + str(golden_label) + ". Congratulations! 🎉 Both of you get the correct answer!", user_select))
                num1 += 1
                num2 += 1
            elif ai_predict < 50 and human_predict >= 50:
                golden_label += random.randint(0, 10)
                ai_predict = 10 + random.randint(-5, 5)
                chatbot.append(("The correct answer is " + str(golden_label) + ". Sorry.. AI wins in this round.", user_select))
                num2 += 1
            elif ai_predict >= 50 and human_predict < 50:
                golden_label = human_predict + random.randint(-4, 4)
                while golden_label < 0:
                    golden_label = human_predict + random.randint(-4, 4)
                chatbot.append(("The correct answer is " + str(golden_label) + ". Great! 🎉 You are close to the answer and better than AI!", user_select))
                num1 += 1
            else:
                chatbot.append(("The correct answer is " + str(golden_label) + ". Sorry... No one gets the correct answer. But nice try! 😉", user_select))

    data = pd.DataFrame(
        {
            "Role": ["AI 🤖", "HUMAN 👨👩"],
            "Scores": [num2, num1],
        }
    )
    # scroe_human = ''' # Human: ''' + str(int(num1))
    # scroe_robot = ''' # Robot: ''' + str(int(num2))
    tot_scores = ''' ### <p style="text-align: center;"> Machine &ensp; ''' + str(int(num2)) + ''' &ensp; VS &ensp; ''' + str(int(num1)) + ''' &ensp; Human </p>'''

    
    num_tmp = max(num1, num2)
    y_lim_upper = (int((num_tmp + 3)/10)+1) * 10
    figure = gr.BarPlot.update(
            data,
            x="Role",
            y="Scores",
            color="Role",
            vertical=False,
            y_lim=[0,y_lim_upper],
            color_legend_position='none', 
            height=250, 
            width=500,
            show_label=False,
            container=False,
        )
    # tooltip=["Role", "Scores"],
    return ai_predict, chatbot, num1, num2, tot_scores, figure

def interpre1(lang_selected, num_selected):
    if lang_selected in ['en']:
        fname = 'data1_en.txt'
    else:
        fname = 'data1_nl_10.txt'
    with open(fname) as f:
        content = f.readlines()
        text = eval(content[int(num_selected*2)])
        interpretation = eval(content[int(num_selected*2+1)])
    
    print(interpretation)

    res = {"original": text['text'], "interpretation": interpretation}
    # pos = []
    # neg = []
    # res = []
    # for i in interpretation:
    #     if i[1] > 0:
    #         pos.append(i[1])
    #     elif i[1] < 0:
    #         neg.append(i[1])
    #     else:
    #         continue
    # median_pos = np.median(pos)
    # median_neg = np.median(neg)


    # res.append(("P", "+"))
    # res.append(("/", None))
    # res.append(("N", "-"))
    # res.append(("Review:", None))
    # for i in interpretation:
    #     if i[1] > median_pos:
    #         res.append((i[0], "+"))
    #     elif i[1] < median_neg:
    #         res.append((i[0], "-"))
    #     else:
    #         res.append((i[0], None))
    return res

def change_lang(choice):
    if choice == "English":
        return gr.Textbox.update('English', visible=False)
    else:
        return gr.Textbox.update('Dutch', visible=False)

    
def func1_written(text_written, human_predict, lang_written):
    chatbot = []
    # num1: Human score; num2: AI score

    '''
    # (START) API version
    
    API_URL = "https://api-inference.huggingface.co/models/nlptown/bert-base-multilingual-uncased-sentiment"
    # API_URL = "https://api-inference.huggingface.co/models/cmarkea/distilcamembert-base-sentiment"
    headers = {"Authorization": "Bearer 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"}

    response = requests.post(API_URL, headers=headers, json=text_written)
    output = response.json()
    
    # result = dict()
    star2num = {
        "5 stars": 100,
        "4 stars": 75,
        "3 stars": 50,
        "2 stars": 25,
        "1 star": 0,
    }

    out = output[0][0]
    # (END) API version
    ''' 

    # (START) off-the-shelf version
    from transformers import AutoTokenizer, AutoModelForSequenceClassification
    from transformers import pipeline


    # tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
    # model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")

    classifier = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment")
    
    output = classifier([text_written])

    star2num = {
        "5 stars": 100,
        "4 stars": 75,
        "3 stars": 50,
        "2 stars": 25,
        "1 star": 0,
    }
    print(output)
    out = output[0]
    # (END) off-the-shelf version

    
    ai_predict = star2num[out['label']]
    # result[label] = out['score']

    if abs(ai_predict - human_predict) <= 12.5:
        chatbot.append(("AI gives it a close score! 🎉", "⬅️ Feel free to try another one! ⬅️"))
    else:
        ai_predict += random.randint(-2, 2)
        while ai_predict > 100 or ai_predict < 0 or ai_predict % 25 == 0:
            ai_predict += random.randint(-2, 2)
        chatbot.append(("AI thinks in a different way from human. 😉", "⬅️ Feel free to try another one! ⬅️"))


    import shap

    # sentiment_classifier = pipeline("text-classification", return_all_scores=True)
    if lang_written == "Dutch":
        sentiment_classifier = pipeline("text-classification", model='DTAI-KULeuven/robbert-v2-dutch-sentiment', return_all_scores=True)
    else:
        sentiment_classifier = pipeline("text-classification", model='distilbert-base-uncased-finetuned-sst-2-english', return_all_scores=True)

    explainer = shap.Explainer(sentiment_classifier)

    shap_values = explainer([text_written])
    interpretation = list(zip(shap_values.data[0], shap_values.values[0, :, 1]))
    
    res = {"original": text_written, "interpretation": interpretation}
    print(res)

    return res, ai_predict, chatbot