yanielbf commited on
Commit
48ac779
1 Parent(s): 09c0ea3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -0
app.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import *
2
+ import gradio as gr
3
+ tokenizer_s = AutoTokenizer.from_pretrained("oskrmiguel/mt5-simplification-spanish")
4
+ model_s = AutoModelForSeq2SeqLM.from_pretrained("oskrmiguel/mt5-simplification-spanish")
5
+ tokenizer = BertTokenizerFast.from_pretrained('mrm8488/bert2bert_shared-spanish-finetuned-summarization')
6
+ model = EncoderDecoderModel.from_pretrained('mrm8488/bert2bert_shared-spanish-finetuned-summarization')
7
+ model_q = AutoModelForSeq2SeqLM.from_pretrained("mrm8488/bert2bert-spanish-question-generation")
8
+ tokenizer_q = AutoTokenizer.from_pretrained("mrm8488/bert2bert-spanish-question-generation")
9
+ tokenizer_a = AutoTokenizer.from_pretrained("mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es")
10
+ model_a = AutoModelForQuestionAnswering.from_pretrained("mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es")
11
+ nlp_a = pipeline('question-answering', model='mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es',
12
+ tokenizer=(
13
+ 'mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es',
14
+ {"use_fast": False}
15
+ ))
16
+ def generate_summary(text):
17
+ inputs = tokenizer([text], padding="max_length", truncation=True, max_length=64, return_tensors="pt")
18
+ input_ids = inputs.input_ids
19
+ attention_mask = inputs.attention_mask
20
+ output = model.generate(input_ids, attention_mask=attention_mask)
21
+ return tokenizer.decode(output[0], skip_special_tokens=True)
22
+ def generate_simple_text(data):
23
+ outputs = []
24
+ for text in data.split("."):
25
+ inputs = tokenizer_s(text, max_length=1024, padding=True, truncation=True, return_tensors='pt')
26
+ output = model_s.generate(inputs['input_ids'], max_length=100)
27
+ outputs.append(['\n'.join([tokenizer_s.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in output])])
28
+ return outputs
29
+ def generate_questions(data):
30
+ outputs = []
31
+ for text in data.split("."):
32
+ inputs_q = tokenizer_q(text, return_tensors="pt")
33
+ outputs_q = model_q.generate(inputs_q['input_ids'], max_length=100)
34
+ question = tokenizer_q.decode(outputs_q[0], skip_special_tokens=True)
35
+ outputs.append(question)
36
+ return outputs
37
+ def generate_answer(question_texts, context_text):
38
+ return nlp_a({'question': question_texts,'context': context_text }
39
+ )['answer']
40
+ def generate_paragraph(data):
41
+ return list(filter(lambda x : x != '', data.split('\n')))
42
+ contexto = gr.inputs.Textbox(lines=10, placeholder="Ingresa un cuento de niños")
43
+ resultado = gr.outputs.HTML(label="Resultado")
44
+ opciones = gr.inputs.CheckboxGroup(["Resumir", "Facil Lectura", "Generar Preguntas", "Ver Respuestas"])
45
+ parrafo_longitud = gr.inputs.Slider(50, 500)
46
+ def generate_question(contexto,opciones, parrafo_longitud):
47
+ parrafos = generate_paragraph(contexto)
48
+ resultado=""
49
+ resumen=[]
50
+ preguntas=[]
51
+ simples=[]
52
+ respuestas=[]
53
+ for i, text in enumerate(parrafos):
54
+ if len(text) < parrafo_longitud:
55
+ resumen.append(text)
56
+ if "Facil Lectura" in opciones:
57
+ simples.append(text)
58
+ else:
59
+ sumarize = generate_summary(text)
60
+ resumen.append(sumarize)
61
+ if "Generar Preguntas" in opciones:
62
+ questions = generate_questions(sumarize)
63
+ preguntas.append(str(i+1)+"-> "+questions[0])
64
+ if "Ver Respuestas" in opciones:
65
+ respuestas.append(str(i+1)+"-> "+generate_answer(questions[0], sumarize))
66
+ if "Facil Lectura" in opciones:
67
+ simples.append(generate_simple_text(sumarize)[0][0])
68
+ resultado += "<p><b>Resumen:</b> "+'<br/>'.join(resumen)+"</p>"
69
+ resultado += "<p><b>Texto Simple:</b> "+'<br/>'.join(simples)+"</p>"
70
+ resultado += "<p><b>Preguntas:</b> "+'<br/>'.join(preguntas)+"</p>"
71
+ resultado += "<p><b>Respuestas:</b> "+'<br/>'.join(respuestas)+"</p>"
72
+ return resultado
73
+ iface = gr.Interface(
74
+ fn=generate_question,
75
+ inputs=[contexto, opciones, parrafo_longitud],
76
+ outputs=resultado)
77
+ iface.launch(debug=True)