from transformers import * import gradio as gr tokenizer_s = AutoTokenizer.from_pretrained("oskrmiguel/mt5-simplification-spanish") model_s = AutoModelForSeq2SeqLM.from_pretrained("oskrmiguel/mt5-simplification-spanish") tokenizer = BertTokenizerFast.from_pretrained('mrm8488/bert2bert_shared-spanish-finetuned-summarization') model = EncoderDecoderModel.from_pretrained('mrm8488/bert2bert_shared-spanish-finetuned-summarization') model_q = AutoModelForSeq2SeqLM.from_pretrained("mrm8488/bert2bert-spanish-question-generation") tokenizer_q = AutoTokenizer.from_pretrained("mrm8488/bert2bert-spanish-question-generation") tokenizer_a = AutoTokenizer.from_pretrained("mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es") model_a = AutoModelForQuestionAnswering.from_pretrained("mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es") nlp_a = pipeline('question-answering', model='mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es', tokenizer=( 'mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es', {"use_fast": False} )) def generate_summary(text): inputs = tokenizer([text], padding="max_length", truncation=True, max_length=64, return_tensors="pt") input_ids = inputs.input_ids attention_mask = inputs.attention_mask output = model.generate(input_ids, attention_mask=attention_mask) return tokenizer.decode(output[0], skip_special_tokens=True) def generate_simple_text(data): outputs = [] for text in data.split("."): inputs = tokenizer_s(text, max_length=1024, padding=True, truncation=True, return_tensors='pt') output = model_s.generate(inputs['input_ids'], max_length=100) outputs.append(['\n'.join([tokenizer_s.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in output])]) return outputs def generate_questions(data): outputs = [] for text in data.split("."): inputs_q = tokenizer_q(text, return_tensors="pt") outputs_q = model_q.generate(inputs_q['input_ids'], max_length=100) question = tokenizer_q.decode(outputs_q[0], skip_special_tokens=True) outputs.append(question) return outputs def generate_answer(question_texts, context_text): return nlp_a({'question': question_texts,'context': context_text } )['answer'] def generate_paragraph(data): return list(filter(lambda x : x != '', data.split('\n'))) contexto = gr.inputs.Textbox(lines=10, placeholder="Ingresa un cuento de niƱos") resultado = gr.outputs.HTML(label="Resultado") opciones = gr.inputs.CheckboxGroup(["Resumir", "Facil Lectura", "Generar Preguntas", "Ver Respuestas"]) parrafo_longitud = gr.inputs.Slider(50, 500) def generate_question(contexto,opciones, parrafo_longitud): parrafos = generate_paragraph(contexto) resultado="" resumen=[] preguntas=[] simples=[] respuestas=[] for i, text in enumerate(parrafos): if len(text) < parrafo_longitud: resumen.append(text) if "Facil Lectura" in opciones: simples.append(text) else: sumarize = generate_summary(text) resumen.append(sumarize) if "Generar Preguntas" in opciones: questions = generate_questions(sumarize) preguntas.append(str(i+1)+"-> "+questions[0]) if "Ver Respuestas" in opciones: respuestas.append(str(i+1)+"-> "+generate_answer(questions[0], sumarize)) if "Facil Lectura" in opciones: simples.append(generate_simple_text(sumarize)[0][0]) resultado += "
Resumen: "+'
'.join(resumen)+"
Texto Simple: "+'
'.join(simples)+"
Preguntas: "+'
'.join(preguntas)+"
Respuestas: "+'
'.join(respuestas)+"