import os import gradio as gr from transformers import AutoModelForSeq2SeqLM, AutoTokenizer import stanza import re stanza.download('en', processors='tokenize') model = AutoModelForSeq2SeqLM.from_pretrained("fangyuan/lfqa_role_classification") tokenizer = AutoTokenizer.from_pretrained("fangyuan/lfqa_role_classification") en_nlp = stanza.Pipeline('en', processors='tokenize') article=''' ## About This is a demo for our paper: [How Do We Answer Complex Questions: Discourse Structure of Long-form Answers](https://aclanthology.org/2022.acl-long.249/). Fangyuan Xu, Junyi Jessy Li, Eunsol Choi. 2022. ## Model The model served here is a T5(large)-based role classification model trained on functional roles of ELI5 answers. ## Resources Please see more information (paper/code/data/datasheet) at our [website](https://www.cs.utexas.edu/~fxu/lfqa_discourse/index.html). ## Contact [Fangyuan Xu](https://www.cs.utexas.edu/~fxu/) via firstname@utexas.edu ''' role_mappings = { 'Answer': 'Answer', 'Answer (Summary)': 'Summary', 'Auxiliary Information': 'Auxiliary Information', 'Answer - Example': 'Example', 'Miscellaneous': 'Miscellaneous', 'Answer - Organizational sentence': 'Organizational sentence', ' ': ' ', } def get_ans_sentence_with_stanza(answer_paragraph, pipeline, is_offset=False): '''sentence segmentation with stanza''' answer_paragraph_processed = pipeline(answer_paragraph) sentences = [] for sent in answer_paragraph_processed.sentences: if is_offset: sentences.append((sent.tokens[0].start_char, sent.tokens[-1].end_char)) else: sentence = answer_paragraph[sent.tokens[0].start_char:sent.tokens[-1].end_char] sentences.append(sentence.strip()) return sentences def create_input_to_t5(question, answer): input_line = [question] answer_paragraph = get_ans_sentence_with_stanza(answer, en_nlp) for idx, answer_sent in enumerate(answer_paragraph): sep_token = '[{}]'.format(idx+1) # shift by one input_line.append(sep_token) input_line.append(answer_sent) return ' '.join(input_line) def process_t5_output(input_txt, output_txt): pred_roles = [] answer_sentence = re.split('\[\d+\] ', input_txt) answer_sentence = answer_sentence[1:] sentence_idx = re.findall('\[\d+\]', input_txt) idx_to_sentence = zip(sentence_idx, answer_sentence) pred_role = re.split('\[\d+\] ', output_txt)[1:] pred_idx = re.findall('\[\d+\]', output_txt) idx_to_role = { idx: role.strip() for (idx, role) in zip(pred_idx, pred_role) } for _, (idx, sentence) in enumerate(idx_to_sentence): pred_role = ' ' if idx not in idx_to_role else idx_to_role[idx] mapped_pred_role = role_mappings[pred_role] pred_roles.append('{} ({})'.format(sentence, mapped_pred_role)) print(input_txt, output_txt) return '\n'.join(pred_roles) def predict(question, answer): input_txt = create_input_to_t5(question, answer) input_ids = tokenizer(input_txt, return_tensors='pt').input_ids outputs = model.generate(input_ids, max_length=512) output_txt = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0] return process_t5_output(input_txt, output_txt) gr.Interface( fn=predict, inputs=[ gr.inputs.Textbox(lines=1, label="Question:"), gr.inputs.Textbox(lines=1, label="Answer:"), ], outputs=[ gr.outputs.Textbox(label="Predicted sentence-level functional roles"), ], theme="peach", title="Discourse structure of long-form answer", description="Input a question with its long-form answer to see the predicted discourse structure by our role classifier.", article=article, examples=[ ['''If a sheep's wool never stops growing, how are they not extinct?''', '''It's already answered that continuous wool growth has been selected by human breeders, but there's a misconception in your question that I'd like to address.Evolution doesn't select for what is best for *the individual*.Traits that help the individual don't necessarily survive.Only traits that ensure *procreation* survive.The quality of life is no concern to nature.Think of pain.There's absolutely no sense of us feeling excruciating pain.When you're dying, its about as much help to you as a sheep with meter long hair.Pain itself however is very useful during lifetime to avoid injury.An individual capable of feeling pain is much more likely to procreate than an individual which is not.That said, it is very unlikely for an expensive trait like growing massive amounts of wool to occur in wild sheep.However, given the right circumstances, it could well occur.Provided it doesn't hamper reproduction too much.'''], ['''Why don't some planets in our solar system orbit the other way around the Sun?''', '''Try to imagine the solar system before there was even really a star.There would have been a cloud of material flying all around.If everything was very random, then very little would have enough speed to avoid being sucked into the newly forming star. If some things had a velocity in one direction and other things had a velocity in another direction, then they would likely end up dragging on each other, slow down, and get pulled in. What we think happened is that as the sun was forming, it acquired a spin and that spin ended up transferring to the cloud of material, shaping it into a disc which eventually would collect together to form the planets. Without this spin, the material would have just fallen into the sun and it would have burned a little bit brighter.'''], ['''Why are skyscraper windows still washed by hand?''', '''I worked on a window-washing robot that cleaned acres of rooftops over a huge commercial greenhouse. Worked great, except when it didn't, and would either break down completely or just get lost and start climbing the wrong parts of the structure. Then repair techs and manual window washers still have to be employed. I think this ends up being a cost/benefit problem where the reliability of our robots and price of implementation isn't quite at the point where it makes this commercially viable for skyscrapers. For what it's worth, I think the Twin Towers actually used a washer robot on the upper floors to limited success.'''] ] ).launch(enable_queue=True)