import gradio as gr
from transformers import BertForQuestionAnswering
from transformers import BertTokenizerFast
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
model = BertForQuestionAnswering.from_pretrained("CountingMstar/ai-tutor-bert-model").to(device)

def get_prediction(context, question):
    inputs = tokenizer.encode_plus(question, context, return_tensors='pt').to(device)
    outputs = model(**inputs)
    
    answer_start = torch.argmax(outputs.start_logits)
    answer_end = torch.argmax(outputs.end_logits) + 1
    
    answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs['input_ids'][0][answer_start:answer_end]))
    
    return answer

def question_answer(context, question):
    prediction = get_prediction(context, question)
    return prediction

def submit(context, question):
    answer = question_answer(context, question)
    return answer

examples = [
    ["A large language model (LLM) is a type of language model notable for its ability to achieve general-purpose language understanding and generation. LLMs acquire these abilities by using massive amounts of data to learn billions of parameters during training and consuming large computational resources during their training and operation.[1] LLMs are artificial neural networks (mainly transformers[2]) and are (pre-)trained using self-supervised learning and semi-supervised learning.","What is large language model?"],
    ["Feature engineering or feature extraction or feature discovery is the process of extracting features (characteristics, properties, attributes) from raw data. Due to deep learning networks, such as convolutional neural networks, that are able to learn features by themselves, domain-specific-based feature engineering has become obsolete for vision and speech processing. Other examples of features in physics include the construction of dimensionless numbers such as Reynolds number in fluid dynamics; then Nusselt number in heat transfer; Archimedes number in sedimentation; construction of first approximations of the solution such as analytical strength of materials solutions in mechanics, etc.", "What is Feature engineering?"],
    ["It calculates soft weights for each word, more precisely for its embedding, in the context window. It can do it either in parallel (such as in transformers) or sequentially (such as recurrent neural networks). Soft weights can change during each runtime, in contrast to hard weights, which are (pre-)trained and fine-tuned and remain frozen afterwards. Attention was developed to address the weaknesses of recurrent neural networks, where words in a sentence are slowly processed one at a time. Machine learning-based attention is a mechanism mimicking cognitive attention. Recurrent neural networks favor more recent words at the end of a sentence while earlier words fade away in volatile neural activations. Attention gives all words equal access to any part of a sentence in a faster parallel scheme and no longer suffers the wait time of serial processing. Earlier uses attached this mechanism to a serial recurrent neural network's language translation system (below), but later uses in Transformers large language models removed the recurrent neural network and relied heavily on the faster parallel attention scheme.", "What is Attention mechanism?"]

]


markdown_text = """
        # AI Tutor BERT
        이 모델은 인공지능(AI) 관련 용어 및 설명을 파인튜닝(fine-tuning)한 BERT 모델입니다.
        ## Model
        https://huggingface.co/bert-base-uncased
        모델의 경우 자연어 처리 모델 중 가장 유명한 Google에서 개발한 BERT를 사용했습니다. 자세한 설명은 위 사이트를 참고하시기 바랍니다. 질의응답이 주인 과외 선생님답게, BERT 중에서도 질의응답에 특화된 Question and Answering 모델을 사용하였습니다.
        
        ## Dataset
        ### Wikipedia
        https://en.wikipedia.org/wiki/Main_Page
        ### activeloop
        https://www.activeloop.ai/resources/glossary/arima-models/
        ### Adrien Beaulieu
        https://product.house/100-ai-glossary-terms-explained-to-the-rest-of-us/
        학습 데이터셋은 인공지능 관련 문맥, 질문, 그리고 응답 이렇게 3가지로 구성이 되어있습니다. 응답(정답) 데이터는 문맥 데이터 안에 포함되어 있고, 문맥 데이터의 문장 순서를 바꿔주어 데이터를 증강하였습니다. 질문 데이터는 주제가 되는 인공지능 용어로 설정했습니다. 위의 예시를 보시면 이해하시기 편하실 겁니다. 총 데이터 수는 3300여 개로 data 폴더에 pickle 파일 형태로 저장되어 있고, 데이터는 Wikipedia 및 다른 사이트들을 에서 html을 이용하여 추출 및 가공하여 제작하였습니다. 해당 출처는 위와 같습니다. 
        ## How to use
        입력 예제는 'Examples'에 표기해 두었습니다.
        관련 문장과 정의를 알고 싶은 단어를 각각 'Contexts', 'Question'에 입력한 후 'Submit' 버튼을 누르면 해당 단어에 대한 설명이 나옵니다.
"""
input_col = gr.Column([gr.Textbox("Context"), gr.Textbox("Question"), gr.Markdown(markdown_text)])
iface = gr.Interface(
    fn=submit,
    inputs=input_col,
    outputs=gr.Textbox("Answer"),
    examples=examples,
    live=True,  # Set live to True to use the submit button
    title="BERT Question Answering",
)

iface.launch()