Spaces:
Runtime error
Runtime error
import os | |
import gradio as gr | |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer | |
import stanza | |
import re | |
stanza.download('en', processors='tokenize') | |
model = AutoModelForSeq2SeqLM.from_pretrained("fangyuan/lfqa_role_classification") | |
tokenizer = AutoTokenizer.from_pretrained("fangyuan/lfqa_role_classification") | |
en_nlp = stanza.Pipeline('en', processors='tokenize') | |
article=''' | |
## About | |
This is a demo for our paper: [How Do We Answer Complex Questions: Discourse Structure of Long-form Answers](https://aclanthology.org/2022.acl-long.249/). | |
Fangyuan Xu, Junyi Jessy Li, Eunsol Choi. 2022. | |
## Model | |
The model served here is a T5(large)-based role classification model trained on functional roles of ELI5 answers. | |
## Resources | |
Please see more information (paper/code/data/datasheet) at our [website](https://www.cs.utexas.edu/~fxu/lfqa_discourse/index.html). | |
## Contact | |
[Fangyuan Xu](https://www.cs.utexas.edu/~fxu/) via firstname@utexas.edu | |
''' | |
role_mappings = { | |
'Answer': 'Answer', | |
'Answer (Summary)': 'Summary', | |
'Auxiliary Information': 'Auxiliary Information', | |
'Answer - Example': 'Example', | |
'Miscellaneous': 'Miscellaneous', | |
'Answer - Organizational sentence': 'Organizational sentence', | |
' ': ' ', | |
} | |
def get_ans_sentence_with_stanza(answer_paragraph, pipeline, | |
is_offset=False): | |
'''sentence segmentation with stanza''' | |
answer_paragraph_processed = pipeline(answer_paragraph) | |
sentences = [] | |
for sent in answer_paragraph_processed.sentences: | |
if is_offset: | |
sentences.append((sent.tokens[0].start_char, sent.tokens[-1].end_char)) | |
else: | |
sentence = answer_paragraph[sent.tokens[0].start_char:sent.tokens[-1].end_char + 1] | |
sentences.append(sentence.strip()) | |
return sentences | |
def create_input_to_t5(question, answer): | |
input_line = [question] | |
answer_paragraph = get_ans_sentence_with_stanza(answer, en_nlp) | |
for idx, answer_sent in enumerate(answer_paragraph): | |
sep_token = '[{}]'.format(idx) | |
input_line.append(sep_token) | |
input_line.append(answer_sent) | |
return ' '.join(input_line) | |
def process_t5_output(input_txt, output_txt): | |
pred_roles = [] | |
answer_sentence = re.split('\[\d+\] ', input_txt) | |
answer_sentence = answer_sentence[1:] | |
sentence_idx = re.findall('\[\d+\]', input_txt) | |
idx_to_sentence = zip(sentence_idx, answer_sentence) | |
pred_role = re.split('\[\d+\] ', output_txt)[1:] | |
pred_idx = re.findall('\[\d+\]', output_txt) | |
idx_to_role = { | |
idx: role.strip() for (idx, role) in zip(pred_idx, pred_role) | |
} | |
for _, (idx, sentence) in enumerate(idx_to_sentence): | |
pred_role = ' ' if idx not in idx_to_role else idx_to_role[idx] | |
mapped_pred_role = role_mappings[pred_role] | |
pred_roles.append('{}: {}'.format(sentence, mapped_pred_role)) | |
pred_roles.append(output_txt) | |
return '\n'.join(pred_roles) | |
def predict(question, answer): | |
input_txt = create_input_to_t5(question, answer) | |
input_ids = tokenizer(input_txt, return_tensors='pt').input_ids | |
outputs = model.generate(input_ids, max_length=512) | |
output_txt = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0] | |
return process_t5_output(input_txt, output_txt) | |
gr.Interface( | |
fn=predict, | |
inputs=[ | |
gr.inputs.Textbox(lines=1, label="Question:"), | |
gr.inputs.Textbox(lines=1, label="Answer:"), | |
], | |
outputs=[ | |
gr.outputs.Textbox(label="Predicted sentence-level functional roles"), | |
], | |
theme="peach", | |
title="Discourse structure of long-form answer", | |
description="Input a question with its long-form answer to see the predicted discourse structure by our role classifier.", | |
article=article, | |
examples=[ | |
#['', ''] | |
] | |
).launch(enable_queue=True) | |