child-history / app.py
yanielbf's picture
Create app.py
48ac779
from transformers import *
import gradio as gr
tokenizer_s = AutoTokenizer.from_pretrained("oskrmiguel/mt5-simplification-spanish")
model_s = AutoModelForSeq2SeqLM.from_pretrained("oskrmiguel/mt5-simplification-spanish")
tokenizer = BertTokenizerFast.from_pretrained('mrm8488/bert2bert_shared-spanish-finetuned-summarization')
model = EncoderDecoderModel.from_pretrained('mrm8488/bert2bert_shared-spanish-finetuned-summarization')
model_q = AutoModelForSeq2SeqLM.from_pretrained("mrm8488/bert2bert-spanish-question-generation")
tokenizer_q = AutoTokenizer.from_pretrained("mrm8488/bert2bert-spanish-question-generation")
tokenizer_a = AutoTokenizer.from_pretrained("mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es")
model_a = AutoModelForQuestionAnswering.from_pretrained("mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es")
nlp_a = pipeline('question-answering', model='mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es',
tokenizer=(
'mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es',
{"use_fast": False}
))
def generate_summary(text):
inputs = tokenizer([text], padding="max_length", truncation=True, max_length=64, return_tensors="pt")
input_ids = inputs.input_ids
attention_mask = inputs.attention_mask
output = model.generate(input_ids, attention_mask=attention_mask)
return tokenizer.decode(output[0], skip_special_tokens=True)
def generate_simple_text(data):
outputs = []
for text in data.split("."):
inputs = tokenizer_s(text, max_length=1024, padding=True, truncation=True, return_tensors='pt')
output = model_s.generate(inputs['input_ids'], max_length=100)
outputs.append(['\n'.join([tokenizer_s.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in output])])
return outputs
def generate_questions(data):
outputs = []
for text in data.split("."):
inputs_q = tokenizer_q(text, return_tensors="pt")
outputs_q = model_q.generate(inputs_q['input_ids'], max_length=100)
question = tokenizer_q.decode(outputs_q[0], skip_special_tokens=True)
outputs.append(question)
return outputs
def generate_answer(question_texts, context_text):
return nlp_a({'question': question_texts,'context': context_text }
)['answer']
def generate_paragraph(data):
return list(filter(lambda x : x != '', data.split('\n')))
contexto = gr.inputs.Textbox(lines=10, placeholder="Ingresa un cuento de niños")
resultado = gr.outputs.HTML(label="Resultado")
opciones = gr.inputs.CheckboxGroup(["Resumir", "Facil Lectura", "Generar Preguntas", "Ver Respuestas"])
parrafo_longitud = gr.inputs.Slider(50, 500)
def generate_question(contexto,opciones, parrafo_longitud):
parrafos = generate_paragraph(contexto)
resultado=""
resumen=[]
preguntas=[]
simples=[]
respuestas=[]
for i, text in enumerate(parrafos):
if len(text) < parrafo_longitud:
resumen.append(text)
if "Facil Lectura" in opciones:
simples.append(text)
else:
sumarize = generate_summary(text)
resumen.append(sumarize)
if "Generar Preguntas" in opciones:
questions = generate_questions(sumarize)
preguntas.append(str(i+1)+"-> "+questions[0])
if "Ver Respuestas" in opciones:
respuestas.append(str(i+1)+"-> "+generate_answer(questions[0], sumarize))
if "Facil Lectura" in opciones:
simples.append(generate_simple_text(sumarize)[0][0])
resultado += "<p><b>Resumen:</b> "+'<br/>'.join(resumen)+"</p>"
resultado += "<p><b>Texto Simple:</b> "+'<br/>'.join(simples)+"</p>"
resultado += "<p><b>Preguntas:</b> "+'<br/>'.join(preguntas)+"</p>"
resultado += "<p><b>Respuestas:</b> "+'<br/>'.join(respuestas)+"</p>"
return resultado
iface = gr.Interface(
fn=generate_question,
inputs=[contexto, opciones, parrafo_longitud],
outputs=resultado)
iface.launch(debug=True)