Spaces:

fangyuan
/

lfqa_discourse

Runtime error

lfqa_discourse / app.py

carrie

quick fix

55ea91c over 2 years ago

3.85 kB

	import os
	import gradio as gr
	from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
	import stanza
	import re
	stanza.download('en', processors='tokenize')

	model = AutoModelForSeq2SeqLM.from_pretrained("fangyuan/lfqa_role_classification")
	tokenizer = AutoTokenizer.from_pretrained("fangyuan/lfqa_role_classification")
	en_nlp = stanza.Pipeline('en', processors='tokenize')


	article='''
	## About
	This is a demo for our paper: [How Do We Answer Complex Questions: Discourse Structure of Long-form Answers](https://aclanthology.org/2022.acl-long.249/).
	Fangyuan Xu, Junyi Jessy Li, Eunsol Choi. 2022.
	## Model
	The model served here is a T5(large)-based role classification model trained on functional roles of ELI5 answers.
	## Resources
	Please see more information (paper/code/data/datasheet) at our [website](https://www.cs.utexas.edu/~fxu/lfqa_discourse/index.html).
	## Contact
	[Fangyuan Xu](https://www.cs.utexas.edu/~fxu/) via firstname@utexas.edu
	'''

	role_mappings = {
	'Answer': 'Answer',
	'Answer (Summary)': 'Summary',
	'Auxiliary Information': 'Auxiliary Information',
	'Answer - Example': 'Example',
	'Miscellaneous': 'Miscellaneous',
	'Answer - Organizational sentence': 'Organizational sentence',
	' ': ' ',
	}

	def get_ans_sentence_with_stanza(answer_paragraph, pipeline,
	is_offset=False):
	'''sentence segmentation with stanza'''
	answer_paragraph_processed = pipeline(answer_paragraph)
	sentences = []
	for sent in answer_paragraph_processed.sentences:
	if is_offset:
	sentences.append((sent.tokens[0].start_char, sent.tokens[-1].end_char))
	else:
	sentence = answer_paragraph[sent.tokens[0].start_char:sent.tokens[-1].end_char + 1]
	sentences.append(sentence.strip())
	return sentences


	def create_input_to_t5(question, answer):
	input_line = [question]
	answer_paragraph = get_ans_sentence_with_stanza(answer, en_nlp)
	for idx, answer_sent in enumerate(answer_paragraph):
	sep_token = '[{}]'.format(idx)
	input_line.append(sep_token)
	input_line.append(answer_sent)
	return ' '.join(input_line)

	def process_t5_output(input_txt, output_txt):
	pred_roles = []
	answer_sentence = re.split('\[\d+\] ', input_txt)
	answer_sentence = answer_sentence[1:]
	sentence_idx = re.findall('\[\d+\]', input_txt)
	idx_to_sentence = zip(sentence_idx, answer_sentence)
	pred_role = re.split('\[\d+\] ', output_txt)[1:]
	pred_idx = re.findall('\[\d+\]', output_txt)
	idx_to_role = {
	idx: role.strip() for (idx, role) in zip(pred_idx, pred_role)
	}
	for _, (idx, sentence) in enumerate(idx_to_sentence):
	pred_role = ' ' if idx not in idx_to_role else idx_to_role[idx]
	mapped_pred_role = role_mappings[pred_role]
	pred_roles.append('{}: {}'.format(sentence, mapped_pred_role))
	pred_roles.append(output_txt)
	return '\n'.join(pred_roles)



	def predict(question, answer):
	input_txt = create_input_to_t5(question, answer)
	input_ids = tokenizer(input_txt, return_tensors='pt').input_ids
	outputs = model.generate(input_ids, max_length=512)
	output_txt = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
	return process_t5_output(input_txt, output_txt)



	gr.Interface(
	fn=predict,
	inputs=[
	gr.inputs.Textbox(lines=1, label="Question:"),
	gr.inputs.Textbox(lines=1, label="Answer:"),
	],
	outputs=[
	gr.outputs.Textbox(label="Predicted sentence-level functional roles"),
	],
	theme="peach",
	title="Discourse structure of long-form answer",
	description="Input a question with its long-form answer to see the predicted discourse structure by our role classifier.",
	article=article,
	examples=[
	#['', '']
	]
	).launch(enable_queue=True)