miracFence commited on
Commit
1022069
1 Parent(s): ab2d0c9

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +184 -0
app.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """ABSTRACTGEN_ES FINAL.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1XdfeMcdDbRuRmOGGiOmkiCP9Yih5JXyF
8
+
9
+ # installs
10
+ """
11
+
12
+ import os
13
+ os.system('pip install gpt_2_simple')
14
+ os.system('pip install os.system')
15
+ os.system('pip install gradio')
16
+ os.system('pip install huggingface_hub')
17
+ os.system('pip install easynmt')
18
+ os.system('pip install sentence-transformers')
19
+ os.system('curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash')
20
+ os.system('apt-get install git-lfs')
21
+ os.system('git lfs install')
22
+ os.system('git clone https://huggingface.co/franz96521/AbstractGeneratorES ')
23
+ #os.system('cd AbstractGeneratorES')
24
+ print(os.getcwd())
25
+ print(os.listdir())
26
+ # Commented out IPython magic to ensure Python compatibility.
27
+ # %cd '/content/AbstractGeneratorES'
28
+
29
+ """# Init"""
30
+
31
+ import gpt_2_simple as gpt2
32
+ import os
33
+ import tensorflow as tf
34
+ import pandas as pd
35
+ import re
36
+
37
+ model_name = "124M"
38
+ if not os.path.isdir(os.path.join("models", model_name)):
39
+ print(f"Downloading {model_name} model...")
40
+ gpt2.download_gpt2(model_name=model_name)
41
+
42
+ path = os.getcwd()+'/AbstractGeneratorES/AbstractGenerator/'
43
+ checkpoint_dir =path+'weights/'
44
+ data_path = path+'TrainigData/'
45
+
46
+
47
+
48
+ file_name_en = 'en'
49
+ file_path_en = data_path+file_name_en
50
+
51
+ file_name_es = 'es'
52
+ file_path_es = data_path+file_name_es
53
+
54
+
55
+ prefix= '<|startoftext|>'
56
+ sufix ='<|endoftext|>'
57
+
58
+ import gradio as gr
59
+ import random
60
+ from easynmt import EasyNMT
61
+
62
+ from sentence_transformers import SentenceTransformer, util
63
+
64
+ def generateAbstract(text):
65
+ tf.compat.v1.reset_default_graph()
66
+ sess = gpt2.start_tf_sess()
67
+ gpt2.load_gpt2(sess,checkpoint_dir=checkpoint_dir,run_name='run1')
68
+ txt = gpt2.generate(sess,prefix=str(text)+"\nABSTRACT", return_as_list=True,truncate=sufix,checkpoint_dir=checkpoint_dir,nsamples=1)[0]
69
+ return txt
70
+ def removeAbstract(text):
71
+ p = text.find("Introducción")
72
+ p2 = text.find("INTRODUCCIÓN")
73
+ print(p,p2)
74
+ if(p != -1):
75
+ return (text[:p] , text[p:] )
76
+ if(p2 != -1):
77
+ return (text[:p2] , text[p2:] )
78
+
79
+ def generated_similarity(type_of_input, cn_text):
80
+ if(type_of_input == "English"):
81
+ tf.compat.v1.reset_default_graph()
82
+ model2 = EasyNMT('opus-mt')
83
+ cn_text = model2.translate(cn_text, target_lang='es')
84
+
85
+
86
+ print(cn_text)
87
+ abstract_original , body = removeAbstract(cn_text)
88
+ tf.compat.v1.reset_default_graph()
89
+
90
+ generated_Abstract = generateAbstract(body)
91
+
92
+ sentences = [abstract_original, generated_Abstract]
93
+
94
+ model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
95
+
96
+ #Compute embedding for both lists
97
+ embedding_1= model.encode(sentences[0], convert_to_tensor=True)
98
+ embedding_2 = model.encode(sentences[1], convert_to_tensor=True)
99
+
100
+ generated_similarity = util.pytorch_cos_sim(embedding_1, embedding_2)
101
+ ## tensor([[0.6003]])
102
+ return f'''TEXTO SIN ABSTRACT\n
103
+ {body}\n
104
+ ABSTRACT ORIGINAL\n
105
+ {abstract_original}\n
106
+ ABSTRACT GENERADO\n
107
+ {generated_Abstract}\n
108
+ SIMILARIDAD DE ABSTRACT: {float(round(generated_similarity.item()*100, 3))}%
109
+ '''
110
+ elif type_of_input == "Spanish":
111
+ abstract_original , body = removeAbstract(cn_text)
112
+ tf.compat.v1.reset_default_graph()
113
+
114
+ generated_Abstract = generateAbstract(body)
115
+
116
+ sentences = [abstract_original, generated_Abstract]
117
+
118
+ model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
119
+
120
+ #Compute embedding for both lists
121
+ embedding_1= model.encode(sentences[0], convert_to_tensor=True)
122
+ embedding_2 = model.encode(sentences[1], convert_to_tensor=True)
123
+
124
+ generated_similarity = util.pytorch_cos_sim(embedding_1, embedding_2)
125
+ return f'''TEXTO SIN ABSTRACT\n
126
+ {body}\n
127
+ ABSTRACT ORIGINAL\n
128
+ {abstract_original}\n
129
+ ABSTRACT GENERADO\n
130
+ {generated_Abstract}\n
131
+ SIMILARIDAD DE ABSTRACT: {float(round(generated_similarity.item()*100, 3))}%
132
+ '''
133
+ def generated_abstract(type_of_input, cn_text):
134
+ if type_of_input == "English":
135
+ tf.compat.v1.reset_default_graph()
136
+ model2 = EasyNMT('opus-mt')
137
+ cn_text = model2.translate(cn_text, target_lang='es')
138
+ generated_Abstract = generateAbstract(cn_text)
139
+ return f'''TEXTO SIN ABSTRACT\n
140
+ {cn_text}\n
141
+ ABSTRACT GENERADO\n
142
+ {generated_Abstract}\n
143
+ '''
144
+ elif type_of_input == "Spanish":
145
+ tf.compat.v1.reset_default_graph()
146
+ generated_Abstract = generateAbstract(cn_text)
147
+ return f'''TEXTO SIN ABSTRACT\n
148
+ {cn_text}\n
149
+ ABSTRACT GENERADO\n
150
+ {generated_Abstract}\n
151
+ '''
152
+
153
+ block = gr.Blocks(theme="dark")
154
+
155
+ with block:
156
+ gr.Markdown('''ABSTRACTGEN_ES''')
157
+ gr.Markdown('''An app that can generate abstracts in Spanish based on the text that you input via document text and if you already have an abstract and need a different idea, check how similar the new abstract is to the original one.
158
+ ''')
159
+ gr.Markdown(''' We used Blocks (beta), which allows you to build web-based demos in a flexible way using the gradio library. Blocks is a more low-level and flexible alternative to the core Interface class.
160
+ The main problem with this library right now is that
161
+ it doesn't support some functionality that Interface
162
+ class has''')
163
+ gr.Markdown('''To get more info about this project go to: https://sites.google.com/up.edu.mx/somos-pln-abstractgen-es/inicio?authuser=0''')
164
+ with gr.Tab("Full text and text similarity"):
165
+ gr.Markdown("Choose the language:")
166
+ type_of_input = gr.inputs.Radio(["English", "Spanish"], label="Input Language")
167
+ with gr.Row():
168
+ cn_text = gr.inputs.Textbox(placeholder="Full text", lines=7)
169
+ with gr.Row():
170
+ cn_results1 = gr.outputs.Textbox(label="Abstract generado")
171
+ cn_run = gr.Button("Run")
172
+ cn_run.click(generated_similarity, inputs=[type_of_input, cn_text], outputs=[cn_results1])
173
+
174
+ with gr.Tab("Only text with no abstract"):
175
+ gr.Markdown("Choose the language:")
176
+ type_of_input = gr.inputs.Radio(["English", "Spanish"], label="Input Language")
177
+ with gr.Row():
178
+ cn_text = gr.inputs.Textbox(placeholder="Text without abstract", lines=7)
179
+ with gr.Row():
180
+ cn_results1 = gr.outputs.Textbox(label="Abstract generado")
181
+ cn_run = gr.Button("Run")
182
+ cn_run.click(generated_abstract, inputs=[type_of_input, cn_text], outputs=cn_results1)
183
+
184
+ block.launch(debug = True)