Spaces:

miracFence
/

generator_es_test

Runtime error

App Files Files Community

generator_es_test / app.py

miracFence

Update app.py

47ddf1a over 3 years ago

raw

history blame contribute delete

5.73 kB

	# -- coding: utf-8 --
	"""ABSTRACTGEN_ES FINAL.ipynb

	Automatically generated by Colaboratory.

	Original file is located at
	https://colab.research.google.com/drive/1XdfeMcdDbRuRmOGGiOmkiCP9Yih5JXyF

	# installs
	"""

	import os
	os.system('pip install gpt_2_simple')
	os.system('pip install os.system')
	os.system('pip install gradio')
	os.system('pip install huggingface_hub')
	os.system('pip install easynmt')
	os.system('pip install sentence-transformers')
	os.system('curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh \| sudo bash')
	os.system('apt-get install git-lfs')
	os.system('git lfs install')
	os.system('git clone https://huggingface.co/franz96521/AbstractGeneratorES ')
	#os.system('cd AbstractGeneratorES')
	print(os.getcwd())
	print(os.listdir())
	# Commented out IPython magic to ensure Python compatibility.
	# %cd '/content/AbstractGeneratorES'

	"""# Init"""

	import gpt_2_simple as gpt2
	import os
	import tensorflow as tf
	import pandas as pd
	import re

	model_name = "124M"
	if not os.path.isdir(os.path.join("models", model_name)):
	print(f"Downloading {model_name} model...")
	gpt2.download_gpt2(model_name=model_name)

	path = os.getcwd()+'/AbstractGeneratorES/AbstractGenerator/'
	checkpoint_dir =path+'weights/'
	data_path = path+'TrainigData/'



	file_name_en = 'en'
	file_path_en = data_path+file_name_en

	file_name_es = 'es'
	file_path_es = data_path+file_name_es


	prefix= '<\|startoftext\|>'
	sufix ='<\|endoftext\|>'

	import gradio as gr
	import random
	from easynmt import EasyNMT

	from sentence_transformers import SentenceTransformer, util

	def generateAbstract(text):
	tf.compat.v1.reset_default_graph()
	sess = gpt2.start_tf_sess()
	gpt2.load_gpt2(sess,checkpoint_dir=checkpoint_dir,run_name='run1')
	txt = gpt2.generate(sess,prefix=str(text)+"\nABSTRACT", return_as_list=True,truncate=sufix,checkpoint_dir=checkpoint_dir,nsamples=1)[0]
	return txt
	def removeAbstract(text):
	p = text.find("Introducción")
	p2 = text.find("INTRODUCCIÓN")
	print(p,p2)
	if(p != -1):
	return (text[:p] , text[p:] )
	if(p2 != -1):
	return (text[:p2] , text[p2:] )

	def generated_similarity(type_of_input, cn_text):
	if(type_of_input == "English"):
	tf.compat.v1.reset_default_graph()
	model2 = EasyNMT('opus-mt')
	cn_text = model2.translate(cn_text, target_lang='es')


	print(cn_text)
	abstract_original , body = removeAbstract(cn_text)
	tf.compat.v1.reset_default_graph()

	generated_Abstract = generateAbstract(body)

	sentences = [abstract_original, generated_Abstract]

	model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

	#Compute embedding for both lists
	embedding_1= model.encode(sentences[0], convert_to_tensor=True)
	embedding_2 = model.encode(sentences[1], convert_to_tensor=True)

	generated_similarity = util.pytorch_cos_sim(embedding_1, embedding_2)
	## tensor([[0.6003]])
	return f'''TEXTO SIN ABSTRACT\n
	{body}\n
	ABSTRACT ORIGINAL\n
	{abstract_original}\n
	ABSTRACT GENERADO\n
	{generated_Abstract}\n
	SIMILARIDAD DE ABSTRACT: {float(round(generated_similarity.item()*100, 3))}%
	'''
	elif type_of_input == "Spanish":
	abstract_original , body = removeAbstract(cn_text)
	tf.compat.v1.reset_default_graph()

	generated_Abstract = generateAbstract(body)

	sentences = [abstract_original, generated_Abstract]

	model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

	#Compute embedding for both lists
	embedding_1= model.encode(sentences[0], convert_to_tensor=True)
	embedding_2 = model.encode(sentences[1], convert_to_tensor=True)

	generated_similarity = util.pytorch_cos_sim(embedding_1, embedding_2)
	return f'''TEXTO SIN ABSTRACT\n
	{body}\n
	ABSTRACT ORIGINAL\n
	{abstract_original}\n
	ABSTRACT GENERADO\n
	{generated_Abstract}\n
	SIMILARIDAD DE ABSTRACT: {float(round(generated_similarity.item()*100, 3))}%
	'''
	def generated_abstract(type_of_input, cn_text):
	if type_of_input == "English":
	tf.compat.v1.reset_default_graph()
	model2 = EasyNMT('opus-mt')
	cn_text = model2.translate(cn_text, target_lang='es')
	generated_Abstract = generateAbstract(cn_text)
	return f'''TEXTO SIN ABSTRACT\n
	{cn_text}\n
	ABSTRACT GENERADO\n
	{generated_Abstract}\n
	'''
	elif type_of_input == "Spanish":
	tf.compat.v1.reset_default_graph()
	generated_Abstract = generateAbstract(cn_text)
	return f'''TEXTO SIN ABSTRACT\n
	{cn_text}\n
	ABSTRACT GENERADO\n
	{generated_Abstract}\n
	'''

	block = gr.Blocks()

	with block:
	gr.Markdown("<h1>ABSTRACTGEN_ES</h1>")
	with gr.Tab("Full text and text similarity"):
	gr.Markdown("Choose language:")
	type_of_input = gr.inputs.Radio(["English", "Spanish"], label="Input Language")
	with gr.Row():
	cn_text = gr.inputs.Textbox(placeholder="Full text", lines=7)
	with gr.Row():
	cn_results1 = gr.outputs.Textbox(label="Abstract generado")
	cn_run = gr.Button("Run")
	cn_run.click(generated_similarity, inputs=[type_of_input, cn_text], outputs=[cn_results1])

	with gr.Tab("Only text with no abstract"):
	gr.Markdown("Choose language:")
	type_of_input = gr.inputs.Radio(["English", "Spanish"], label="Input Language")
	with gr.Row():
	cn_text = gr.inputs.Textbox(placeholder="Text without abstract", lines=7)
	with gr.Row():
	cn_results1 = gr.outputs.Textbox(label="Abstract generado")
	cn_run = gr.Button("Run")
	cn_run.click(generated_abstract, inputs=[type_of_input, cn_text], outputs=cn_results1)

	block.launch(debug = True)