# -*- coding: utf-8 -*- """ABSTRACTGEN_ES FINAL.ipynb Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/1XdfeMcdDbRuRmOGGiOmkiCP9Yih5JXyF # installs """ import os os.system('pip install gpt_2_simple') os.system('pip install os.system') os.system('pip install gradio') os.system('pip install huggingface_hub') os.system('pip install easynmt') os.system('pip install sentence-transformers') os.system('curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash') os.system('apt-get install git-lfs') os.system('git lfs install') os.system('git clone https://huggingface.co/franz96521/AbstractGeneratorES ') #os.system('cd AbstractGeneratorES') print(os.getcwd()) print(os.listdir()) # Commented out IPython magic to ensure Python compatibility. # %cd '/content/AbstractGeneratorES' """# Init""" import gpt_2_simple as gpt2 import os import tensorflow as tf import pandas as pd import re model_name = "124M" if not os.path.isdir(os.path.join("models", model_name)): print(f"Downloading {model_name} model...") gpt2.download_gpt2(model_name=model_name) path = os.getcwd()+'/AbstractGeneratorES/AbstractGenerator/' checkpoint_dir =path+'weights/' data_path = path+'TrainigData/' file_name_en = 'en' file_path_en = data_path+file_name_en file_name_es = 'es' file_path_es = data_path+file_name_es prefix= '<|startoftext|>' sufix ='<|endoftext|>' import gradio as gr import random from easynmt import EasyNMT from sentence_transformers import SentenceTransformer, util def generateAbstract(text): tf.compat.v1.reset_default_graph() sess = gpt2.start_tf_sess() gpt2.load_gpt2(sess,checkpoint_dir=checkpoint_dir,run_name='run1') txt = gpt2.generate(sess,prefix=str(text)+"\nABSTRACT", return_as_list=True,truncate=sufix,checkpoint_dir=checkpoint_dir,nsamples=1)[0] return txt def removeAbstract(text): p = text.find("Introducción") p2 = text.find("INTRODUCCIÓN") print(p,p2) if(p != -1): return (text[:p] , text[p:] ) if(p2 != -1): return (text[:p2] , text[p2:] ) def generated_similarity(type_of_input, cn_text): if(type_of_input == "English"): tf.compat.v1.reset_default_graph() model2 = EasyNMT('opus-mt') cn_text = model2.translate(cn_text, target_lang='es') print(cn_text) abstract_original , body = removeAbstract(cn_text) tf.compat.v1.reset_default_graph() generated_Abstract = generateAbstract(body) sentences = [abstract_original, generated_Abstract] model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') #Compute embedding for both lists embedding_1= model.encode(sentences[0], convert_to_tensor=True) embedding_2 = model.encode(sentences[1], convert_to_tensor=True) generated_similarity = util.pytorch_cos_sim(embedding_1, embedding_2) ## tensor([[0.6003]]) return f'''TEXTO SIN ABSTRACT\n {body}\n ABSTRACT ORIGINAL\n {abstract_original}\n ABSTRACT GENERADO\n {generated_Abstract}\n SIMILARIDAD DE ABSTRACT: {float(round(generated_similarity.item()*100, 3))}% ''' elif type_of_input == "Spanish": abstract_original , body = removeAbstract(cn_text) tf.compat.v1.reset_default_graph() generated_Abstract = generateAbstract(body) sentences = [abstract_original, generated_Abstract] model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') #Compute embedding for both lists embedding_1= model.encode(sentences[0], convert_to_tensor=True) embedding_2 = model.encode(sentences[1], convert_to_tensor=True) generated_similarity = util.pytorch_cos_sim(embedding_1, embedding_2) return f'''TEXTO SIN ABSTRACT\n {body}\n ABSTRACT ORIGINAL\n {abstract_original}\n ABSTRACT GENERADO\n {generated_Abstract}\n SIMILARIDAD DE ABSTRACT: {float(round(generated_similarity.item()*100, 3))}% ''' def generated_abstract(type_of_input, cn_text): if type_of_input == "English": tf.compat.v1.reset_default_graph() model2 = EasyNMT('opus-mt') cn_text = model2.translate(cn_text, target_lang='es') generated_Abstract = generateAbstract(cn_text) return f'''TEXTO SIN ABSTRACT\n {cn_text}\n ABSTRACT GENERADO\n {generated_Abstract}\n ''' elif type_of_input == "Spanish": tf.compat.v1.reset_default_graph() generated_Abstract = generateAbstract(cn_text) return f'''TEXTO SIN ABSTRACT\n {cn_text}\n ABSTRACT GENERADO\n {generated_Abstract}\n ''' block = gr.Blocks() with block: gr.Markdown("

ABSTRACTGEN_ES

") with gr.Tab("Full text and text similarity"): gr.Markdown("Choose language:") type_of_input = gr.inputs.Radio(["English", "Spanish"], label="Input Language") with gr.Row(): cn_text = gr.inputs.Textbox(placeholder="Full text", lines=7) with gr.Row(): cn_results1 = gr.outputs.Textbox(label="Abstract generado") cn_run = gr.Button("Run") cn_run.click(generated_similarity, inputs=[type_of_input, cn_text], outputs=[cn_results1]) with gr.Tab("Only text with no abstract"): gr.Markdown("Choose language:") type_of_input = gr.inputs.Radio(["English", "Spanish"], label="Input Language") with gr.Row(): cn_text = gr.inputs.Textbox(placeholder="Text without abstract", lines=7) with gr.Row(): cn_results1 = gr.outputs.Textbox(label="Abstract generado") cn_run = gr.Button("Run") cn_run.click(generated_abstract, inputs=[type_of_input, cn_text], outputs=cn_results1) block.launch(debug = True)