import os
import gradio as gr
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import stanza
import re
stanza.download('en', processors='tokenize')

model = AutoModelForSeq2SeqLM.from_pretrained("fangyuan/lfqa_role_classification")
tokenizer = AutoTokenizer.from_pretrained("fangyuan/lfqa_role_classification")
en_nlp = stanza.Pipeline('en', processors='tokenize')


article='''
## About 
This is a demo for our paper: [How Do We Answer Complex Questions: Discourse Structure of Long-form Answers](https://aclanthology.org/2022.acl-long.249/).

Fangyuan Xu, Junyi Jessy Li, Eunsol Choi. 2022.
## Model
The model served here is a T5(large)-based role classification model trained on functional roles of ELI5 answers.
## Resources 
Please see more information (paper/code/data/datasheet) at our [website](https://www.cs.utexas.edu/~fxu/lfqa_discourse/index.html).
## Contact
[Fangyuan Xu](https://www.cs.utexas.edu/~fxu/) via firstname@utexas.edu
'''

role_mappings = {
    'Answer': 'Answer',
    'Answer (Summary)': 'Summary',
    'Auxiliary Information': 'Auxiliary Information',
    'Answer - Example': 'Example',
    'Miscellaneous': 'Miscellaneous',
    'Answer - Organizational sentence': 'Organizational sentence',
    ' ': ' ',
}

def get_ans_sentence_with_stanza(answer_paragraph, pipeline,
                                 is_offset=False):
    '''sentence segmentation with stanza'''
    answer_paragraph_processed = pipeline(answer_paragraph)
    sentences = []
    for sent in answer_paragraph_processed.sentences:
        if is_offset:
            sentences.append((sent.tokens[0].start_char, sent.tokens[-1].end_char))
        else:
            sentence = answer_paragraph[sent.tokens[0].start_char:sent.tokens[-1].end_char]
            sentences.append(sentence.strip())
    return sentences


def create_input_to_t5(question, answer):
    input_line = [question]
    answer_paragraph = get_ans_sentence_with_stanza(answer, en_nlp)
    for idx, answer_sent in enumerate(answer_paragraph):
        sep_token = '[{}]'.format(idx+1) # shift by one
        input_line.append(sep_token)
        input_line.append(answer_sent)
    return ' '.join(input_line)

def process_t5_output(input_txt, output_txt):
    pred_roles = []
    answer_sentence = re.split('\[\d+\] ', input_txt)
    answer_sentence = answer_sentence[1:]
    sentence_idx = re.findall('\[\d+\]', input_txt)
    idx_to_sentence = zip(sentence_idx, answer_sentence)
    pred_role = re.split('\[\d+\] ', output_txt)[1:]
    pred_idx = re.findall('\[\d+\]', output_txt)
    idx_to_role = {
        idx: role.strip() for (idx, role) in zip(pred_idx, pred_role)
    }
    for _, (idx, sentence) in enumerate(idx_to_sentence):
        pred_role = ' ' if idx not in idx_to_role else idx_to_role[idx]
        mapped_pred_role = role_mappings[pred_role]
        pred_roles.append('{} ({})'.format(sentence, mapped_pred_role))
    print(input_txt, output_txt)
    return '\n'.join(pred_roles)


def predict(question, answer):
    input_txt = create_input_to_t5(question, answer)
    input_ids = tokenizer(input_txt, return_tensors='pt').input_ids
    outputs = model.generate(input_ids, max_length=512)
    output_txt = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
    return process_t5_output(input_txt, output_txt)


gr.Interface(
    fn=predict,
    inputs=[
        gr.inputs.Textbox(lines=1, label="Question:"),
        gr.inputs.Textbox(lines=1, label="Answer:"),
    ],
    outputs=[
        gr.outputs.Textbox(label="Predicted sentence-level functional roles"),
    ],
    theme="peach",
    title="Discourse structure of long-form answer",
    description="Input a question with its long-form answer to see the predicted discourse structure by our role classifier.",
    article=article,
    examples=[
        ['''If a sheep's wool never stops growing, how are they not extinct?''',
         '''It's already answered that continuous wool growth has been selected by human breeders, but there's a misconception in your question that I'd like to address.Evolution doesn't select for what is best for *the individual*.Traits that help the individual don't necessarily survive.Only traits that ensure *procreation* survive.The quality of life is no concern to nature.Think of pain.There's absolutely no sense of us feeling excruciating pain.When you're dying, its about as much help to you as a sheep with meter long hair.Pain itself however is very useful during lifetime to avoid injury.An individual capable of feeling pain is much more likely to procreate than an individual which is not.That said, it is very unlikely for an expensive trait like growing massive amounts of wool to occur in wild sheep.However, given the right circumstances, it could well occur.Provided it doesn't hamper reproduction too much.''']
    ]
).launch(enable_queue=True)