Spaces:
Runtime error
Runtime error
# -*- coding: utf-8 -*- | |
"""ABSTRACTGEN_ES FINAL.ipynb | |
Automatically generated by Colaboratory. | |
Original file is located at | |
https://colab.research.google.com/drive/1XdfeMcdDbRuRmOGGiOmkiCP9Yih5JXyF | |
# installs | |
""" | |
import os | |
os.system('pip install gpt_2_simple') | |
os.system('pip install os.system') | |
os.system('pip install gradio') | |
os.system('pip install huggingface_hub') | |
os.system('pip install easynmt') | |
os.system('pip install sentence-transformers') | |
os.system('curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash') | |
os.system('apt-get install git-lfs') | |
os.system('git lfs install') | |
os.system('git clone https://huggingface.co/franz96521/AbstractGeneratorES ') | |
#os.system('cd AbstractGeneratorES') | |
print(os.getcwd()) | |
print(os.listdir()) | |
# Commented out IPython magic to ensure Python compatibility. | |
# %cd '/content/AbstractGeneratorES' | |
"""# Init""" | |
import gpt_2_simple as gpt2 | |
import os | |
import tensorflow as tf | |
import pandas as pd | |
import re | |
model_name = "124M" | |
if not os.path.isdir(os.path.join("models", model_name)): | |
print(f"Downloading {model_name} model...") | |
gpt2.download_gpt2(model_name=model_name) | |
path = os.getcwd()+'/AbstractGeneratorES/AbstractGenerator/' | |
checkpoint_dir =path+'weights/' | |
data_path = path+'TrainigData/' | |
file_name_en = 'en' | |
file_path_en = data_path+file_name_en | |
file_name_es = 'es' | |
file_path_es = data_path+file_name_es | |
prefix= '<|startoftext|>' | |
sufix ='<|endoftext|>' | |
import gradio as gr | |
import random | |
from easynmt import EasyNMT | |
from sentence_transformers import SentenceTransformer, util | |
def generateAbstract(text): | |
tf.compat.v1.reset_default_graph() | |
sess = gpt2.start_tf_sess() | |
gpt2.load_gpt2(sess,checkpoint_dir=checkpoint_dir,run_name='run1') | |
txt = gpt2.generate(sess,prefix=str(text)+"\nABSTRACT", return_as_list=True,truncate=sufix,checkpoint_dir=checkpoint_dir,nsamples=1)[0] | |
return txt | |
def removeAbstract(text): | |
p = text.find("Introducción") | |
p2 = text.find("INTRODUCCIÓN") | |
print(p,p2) | |
if(p != -1): | |
return (text[:p] , text[p:] ) | |
if(p2 != -1): | |
return (text[:p2] , text[p2:] ) | |
def generated_similarity(type_of_input, cn_text): | |
if(type_of_input == "English"): | |
tf.compat.v1.reset_default_graph() | |
model2 = EasyNMT('opus-mt') | |
cn_text = model2.translate(cn_text, target_lang='es') | |
print(cn_text) | |
abstract_original , body = removeAbstract(cn_text) | |
tf.compat.v1.reset_default_graph() | |
generated_Abstract = generateAbstract(body) | |
sentences = [abstract_original, generated_Abstract] | |
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') | |
#Compute embedding for both lists | |
embedding_1= model.encode(sentences[0], convert_to_tensor=True) | |
embedding_2 = model.encode(sentences[1], convert_to_tensor=True) | |
generated_similarity = util.pytorch_cos_sim(embedding_1, embedding_2) | |
## tensor([[0.6003]]) | |
return f'''TEXTO SIN ABSTRACT\n | |
{body}\n | |
ABSTRACT ORIGINAL\n | |
{abstract_original}\n | |
ABSTRACT GENERADO\n | |
{generated_Abstract}\n | |
SIMILARIDAD DE ABSTRACT: {float(round(generated_similarity.item()*100, 3))}% | |
''' | |
elif type_of_input == "Spanish": | |
abstract_original , body = removeAbstract(cn_text) | |
tf.compat.v1.reset_default_graph() | |
generated_Abstract = generateAbstract(body) | |
sentences = [abstract_original, generated_Abstract] | |
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') | |
#Compute embedding for both lists | |
embedding_1= model.encode(sentences[0], convert_to_tensor=True) | |
embedding_2 = model.encode(sentences[1], convert_to_tensor=True) | |
generated_similarity = util.pytorch_cos_sim(embedding_1, embedding_2) | |
return f'''TEXTO SIN ABSTRACT\n | |
{body}\n | |
ABSTRACT ORIGINAL\n | |
{abstract_original}\n | |
ABSTRACT GENERADO\n | |
{generated_Abstract}\n | |
SIMILARIDAD DE ABSTRACT: {float(round(generated_similarity.item()*100, 3))}% | |
''' | |
def generated_abstract(type_of_input, cn_text): | |
if type_of_input == "English": | |
tf.compat.v1.reset_default_graph() | |
model2 = EasyNMT('opus-mt') | |
cn_text = model2.translate(cn_text, target_lang='es') | |
generated_Abstract = generateAbstract(cn_text) | |
return f'''TEXTO SIN ABSTRACT\n | |
{cn_text}\n | |
ABSTRACT GENERADO\n | |
{generated_Abstract}\n | |
''' | |
elif type_of_input == "Spanish": | |
tf.compat.v1.reset_default_graph() | |
generated_Abstract = generateAbstract(cn_text) | |
return f'''TEXTO SIN ABSTRACT\n | |
{cn_text}\n | |
ABSTRACT GENERADO\n | |
{generated_Abstract}\n | |
''' | |
block = gr.Blocks() | |
with block: | |
gr.Markdown('''ABSTRACTGEN_ES''') | |
gr.Markdown('''An app that can generate abstracts in Spanish based on the text that you input via document text and if you already have an abstract and need a different idea, check how similar the new abstract is to the original one. | |
''') | |
gr.Markdown(''' We used Blocks (beta), which allows you to build web-based demos in a flexible way using the gradio library. Blocks is a more low-level and flexible alternative to the core Interface class. | |
The main problem with this library right now is that | |
it doesn't support some functionality that Interface | |
class has''') | |
gr.Markdown('''To get more info about this project go to: https://sites.google.com/up.edu.mx/somos-pln-abstractgen-es/inicio?authuser=0''') | |
with gr.Tab("Full text and text similarity"): | |
gr.Markdown("Choose the language:") | |
type_of_input = gr.inputs.Radio(["English", "Spanish"], label="Input Language") | |
with gr.Row(): | |
cn_text = gr.inputs.Textbox(placeholder="Full text", lines=7) | |
with gr.Row(): | |
cn_results1 = gr.outputs.Textbox(label="Abstract generado") | |
cn_run = gr.Button("Run") | |
cn_run.click(generated_similarity, inputs=[type_of_input, cn_text], outputs=[cn_results1]) | |
with gr.Tab("Only text with no abstract"): | |
gr.Markdown("Choose the language:") | |
type_of_input = gr.inputs.Radio(["English", "Spanish"], label="Input Language") | |
with gr.Row(): | |
cn_text = gr.inputs.Textbox(placeholder="Text without abstract", lines=7) | |
with gr.Row(): | |
cn_results1 = gr.outputs.Textbox(label="Abstract generado") | |
cn_run = gr.Button("Run") | |
cn_run.click(generated_abstract, inputs=[type_of_input, cn_text], outputs=cn_results1) | |
block.launch(debug = True) | |