File size: 3,936 Bytes
48ac779
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
from transformers import *
import gradio as gr
tokenizer_s = AutoTokenizer.from_pretrained("oskrmiguel/mt5-simplification-spanish")
model_s = AutoModelForSeq2SeqLM.from_pretrained("oskrmiguel/mt5-simplification-spanish")
tokenizer = BertTokenizerFast.from_pretrained('mrm8488/bert2bert_shared-spanish-finetuned-summarization')
model = EncoderDecoderModel.from_pretrained('mrm8488/bert2bert_shared-spanish-finetuned-summarization')
model_q = AutoModelForSeq2SeqLM.from_pretrained("mrm8488/bert2bert-spanish-question-generation")
tokenizer_q = AutoTokenizer.from_pretrained("mrm8488/bert2bert-spanish-question-generation")
tokenizer_a = AutoTokenizer.from_pretrained("mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es")
model_a = AutoModelForQuestionAnswering.from_pretrained("mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es")
nlp_a = pipeline('question-answering', model='mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es',
    tokenizer=(
        'mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es',  
        {"use_fast": False}
    ))
def generate_summary(text):
   inputs = tokenizer([text], padding="max_length", truncation=True, max_length=64, return_tensors="pt")
   input_ids = inputs.input_ids
   attention_mask = inputs.attention_mask
   output = model.generate(input_ids, attention_mask=attention_mask)
   return tokenizer.decode(output[0], skip_special_tokens=True)
def generate_simple_text(data):
    outputs = []
    for text in data.split("."):
        inputs = tokenizer_s(text, max_length=1024, padding=True, truncation=True, return_tensors='pt')
        output = model_s.generate(inputs['input_ids'], max_length=100)
        outputs.append(['\n'.join([tokenizer_s.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in output])])
    return outputs
def generate_questions(data):
  outputs = []
  for text in data.split("."):
    inputs_q = tokenizer_q(text, return_tensors="pt")
    outputs_q = model_q.generate(inputs_q['input_ids'], max_length=100)
    question = tokenizer_q.decode(outputs_q[0], skip_special_tokens=True)
    outputs.append(question)
  return outputs
def generate_answer(question_texts, context_text):
  return nlp_a({'question': question_texts,'context': context_text           }
  )['answer']
def generate_paragraph(data):
  return list(filter(lambda x : x != '', data.split('\n')))
contexto = gr.inputs.Textbox(lines=10, placeholder="Ingresa un cuento de niños")
resultado = gr.outputs.HTML(label="Resultado")
opciones = gr.inputs.CheckboxGroup(["Resumir", "Facil Lectura", "Generar Preguntas", "Ver Respuestas"])
parrafo_longitud = gr.inputs.Slider(50, 500)
def generate_question(contexto,opciones, parrafo_longitud):
  parrafos = generate_paragraph(contexto)
  resultado=""
  resumen=[]
  preguntas=[]
  simples=[]
  respuestas=[]
  for i, text in enumerate(parrafos):
    if len(text) < parrafo_longitud:
      resumen.append(text)
      if "Facil Lectura" in opciones:
        simples.append(text)
    else:
      sumarize = generate_summary(text)
      resumen.append(sumarize)
      if "Generar Preguntas" in opciones:
        questions = generate_questions(sumarize)
        preguntas.append(str(i+1)+"-> "+questions[0])
        if "Ver Respuestas" in opciones:
          respuestas.append(str(i+1)+"-> "+generate_answer(questions[0], sumarize))
      if "Facil Lectura" in opciones:
        simples.append(generate_simple_text(sumarize)[0][0])
  resultado += "<p><b>Resumen:</b> "+'<br/>'.join(resumen)+"</p>" 
  resultado += "<p><b>Texto Simple:</b> "+'<br/>'.join(simples)+"</p>" 
  resultado += "<p><b>Preguntas:</b> "+'<br/>'.join(preguntas)+"</p>" 
  resultado += "<p><b>Respuestas:</b> "+'<br/>'.join(respuestas)+"</p>"
  return resultado
iface = gr.Interface(
  fn=generate_question, 
  inputs=[contexto, opciones, parrafo_longitud], 
  outputs=resultado)
iface.launch(debug=True)