GPalomeque
commited on
Commit
•
766117c
1
Parent(s):
f98e6e4
Upload app.py
Browse files
app.py
ADDED
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
|
3 |
+
from app_data import examples
|
4 |
+
|
5 |
+
from app_details import title, description, article
|
6 |
+
|
7 |
+
from transformers import AutoModelForTokenClassification,AutoModelForSequenceClassification, AutoTokenizer, pipeline
|
8 |
+
|
9 |
+
from sentence_transformers import SentenceTransformer, util
|
10 |
+
|
11 |
+
def get_entities(example):
|
12 |
+
model_name = "hackathon-pln-es/jurisbert-finetuning-ner"
|
13 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, add_prefix_space=True)
|
14 |
+
|
15 |
+
model = AutoModelForTokenClassification.from_pretrained(model_name)
|
16 |
+
token_classifier = pipeline("token-classification", aggregation_strategy="simple", model=model, tokenizer=tokenizer)
|
17 |
+
results = token_classifier(example.lower())
|
18 |
+
|
19 |
+
output = []
|
20 |
+
|
21 |
+
i=0
|
22 |
+
item = None
|
23 |
+
prev_item = None
|
24 |
+
next_item = None
|
25 |
+
while i < (len(results)):
|
26 |
+
item = results[i]
|
27 |
+
p=i-1
|
28 |
+
n=i+1
|
29 |
+
|
30 |
+
if p > 0:
|
31 |
+
prev_item = results[p]
|
32 |
+
|
33 |
+
|
34 |
+
if n<(len(results)):
|
35 |
+
next_item = results[n]
|
36 |
+
|
37 |
+
|
38 |
+
if (i==0):
|
39 |
+
if item["start"]>0:
|
40 |
+
output.extend([(example[0:item["start"]], None)])
|
41 |
+
output.extend([(example[item["start"]:item["end"]], item["entity_group"])])
|
42 |
+
if (next_item!=None):
|
43 |
+
##verificar el tramo entre actual y siguiente
|
44 |
+
if(item["end"]!=next_item["start"]):
|
45 |
+
output.extend([(example[item["end"]:next_item["start"]], None)])
|
46 |
+
i=i+1
|
47 |
+
|
48 |
+
if (item!=None):
|
49 |
+
if (item["end"] < len(example)):
|
50 |
+
output.extend([(example[item["end"]:len(example)], None)])
|
51 |
+
|
52 |
+
return output
|
53 |
+
|
54 |
+
def clasifica_sistema_universal(example):
|
55 |
+
tokenizer = AutoTokenizer.from_pretrained("hackathon-pln-es/jurisbert-class-tratados-internacionales-sistema-universal")
|
56 |
+
|
57 |
+
model = AutoModelForSequenceClassification.from_pretrained("hackathon-pln-es/jurisbert-class-tratados-internacionales-sistema-universal")
|
58 |
+
text_classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
|
59 |
+
results= text_classifier (example)
|
60 |
+
|
61 |
+
salida=[]
|
62 |
+
for i in results:
|
63 |
+
salida.append({i["label"]:i["score"]})
|
64 |
+
|
65 |
+
#return results[0]["label"], round(results[0]["score"], 5)
|
66 |
+
|
67 |
+
|
68 |
+
return {i["label"]: float(i["score"]) for i in results}
|
69 |
+
|
70 |
+
def clasifica_conv_americana(example):
|
71 |
+
tokenizer = AutoTokenizer.from_pretrained("hackathon-pln-es/jurisbert-clas-art-convencion-americana-dh")
|
72 |
+
|
73 |
+
model = AutoModelForSequenceClassification.from_pretrained("hackathon-pln-es/jurisbert-clas-art-convencion-americana-dh")
|
74 |
+
|
75 |
+
text_classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
|
76 |
+
results= text_classifier (example)
|
77 |
+
|
78 |
+
return {i["label"]: float(i["score"]) for i in results}
|
79 |
+
|
80 |
+
def similitud(example,example2):
|
81 |
+
model = SentenceTransformer("hackathon-pln-es/jurisbert-tsdae-sentence-transformer")
|
82 |
+
#Compute embedding for both lists
|
83 |
+
embeddings1 = model.encode(example, convert_to_tensor=True)
|
84 |
+
embeddings2 = model.encode(example2, convert_to_tensor=True)
|
85 |
+
|
86 |
+
#Compute cosine-similarits
|
87 |
+
cosine_scores = util.cos_sim(embeddings1, embeddings2)
|
88 |
+
|
89 |
+
return float(cosine_scores[0])*100
|
90 |
+
|
91 |
+
|
92 |
+
def process(example,example2):
|
93 |
+
entidades = get_entities(example)
|
94 |
+
|
95 |
+
class_sistema_universal = clasifica_sistema_universal(example)
|
96 |
+
|
97 |
+
class_conv_americana = clasifica_conv_americana(example)
|
98 |
+
|
99 |
+
score_similitud = similitud(example,example2)
|
100 |
+
|
101 |
+
entidades2 = get_entities(example2)
|
102 |
+
|
103 |
+
class_sistema_universal2 = clasifica_sistema_universal(example2)
|
104 |
+
|
105 |
+
class_conv_americana2 = clasifica_conv_americana(example2)
|
106 |
+
return entidades,class_sistema_universal, class_conv_americana, score_similitud , entidades2 ,class_sistema_universal2, class_conv_americana2
|
107 |
+
|
108 |
+
input_sen = gr.inputs.Textbox(lines=10, label="Texto a analizar:")
|
109 |
+
|
110 |
+
input_sen2 = gr.inputs.Textbox(lines=10, label="Texto a comparar:")
|
111 |
+
|
112 |
+
#### Resultados texto analizar:
|
113 |
+
output_hgtxt= gr.outputs.HighlightedText(label="Reconocimiento de entidades:")
|
114 |
+
output_lbl1= gr.outputs.Label(label="Clasificación modelo sistema universal:")
|
115 |
+
output_lbl2= gr.outputs.Label(label="Clasificación modelo convención americana:")
|
116 |
+
|
117 |
+
#### Resultados de la similitud
|
118 |
+
output_txt= gr.outputs.Textbox(label="Porcentaje de similitud entre los textos proporcionados:")
|
119 |
+
|
120 |
+
#### Resultados texto a comparar:
|
121 |
+
output_hgtxt2= gr.outputs.HighlightedText(label="Reconocimiento de entidades:")
|
122 |
+
output_lbl3= gr.outputs.Label(label="Clasificación modelo sistema universal:")
|
123 |
+
output_lbl4= gr.outputs.Label(label="Clasificación modelo convención americana:")
|
124 |
+
|
125 |
+
#iface = gr.Interface(fn=process, inputs=input_sen, outputs=["highlight","label","label"], examples=examples, title=title, description = description)
|
126 |
+
|
127 |
+
iface = gr.Interface(fn=process, inputs=[input_sen, input_sen2], outputs=[output_hgtxt,output_lbl2,output_lbl2,output_txt,output_hgtxt2,output_lbl3,output_lbl4], examples=examples, title=title, description = description, article=article)
|
128 |
+
|
129 |
+
iface.launch()
|