Commit
路
f98e6e4
1
Parent(s):
1d52e47
Delete app.py
Browse files
app.py
DELETED
@@ -1,129 +0,0 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
|
3 |
-
from app_data import examples
|
4 |
-
|
5 |
-
from app_details import title, description, article
|
6 |
-
|
7 |
-
from transformers import AutoModelForTokenClassification,AutoModelForSequenceClassification, AutoTokenizer, pipeline
|
8 |
-
|
9 |
-
from sentence_transformers import SentenceTransformer, util
|
10 |
-
|
11 |
-
def get_entities(example):
|
12 |
-
model_name = "hackathon-pln-es/jurisbert-finetuning-ner"
|
13 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name, add_prefix_space=True)
|
14 |
-
|
15 |
-
model = AutoModelForTokenClassification.from_pretrained(model_name)
|
16 |
-
token_classifier = pipeline("token-classification", aggregation_strategy="simple", model=model, tokenizer=tokenizer)
|
17 |
-
results = token_classifier(example.lower())
|
18 |
-
|
19 |
-
output = []
|
20 |
-
|
21 |
-
i=0
|
22 |
-
item = None
|
23 |
-
prev_item = None
|
24 |
-
next_item = None
|
25 |
-
while i < (len(results)):
|
26 |
-
item = results[i]
|
27 |
-
p=i-1
|
28 |
-
n=i+1
|
29 |
-
|
30 |
-
if p > 0:
|
31 |
-
prev_item = results[p]
|
32 |
-
|
33 |
-
|
34 |
-
if n<(len(results)):
|
35 |
-
next_item = results[n]
|
36 |
-
|
37 |
-
|
38 |
-
if (i==0):
|
39 |
-
if item["start"]>0:
|
40 |
-
output.extend([(example[0:item["start"]], None)])
|
41 |
-
output.extend([(example[item["start"]:item["end"]], item["entity_group"])])
|
42 |
-
if (next_item!=None):
|
43 |
-
##verificar el tramo entre actual y siguiente
|
44 |
-
if(item["end"]!=next_item["start"]):
|
45 |
-
output.extend([(example[item["end"]:next_item["start"]], None)])
|
46 |
-
i=i+1
|
47 |
-
|
48 |
-
if (item!=None):
|
49 |
-
if (item["end"] < len(example)):
|
50 |
-
output.extend([(example[item["end"]:len(example)], None)])
|
51 |
-
|
52 |
-
return output
|
53 |
-
|
54 |
-
def clasifica_sistema_universal(example):
|
55 |
-
tokenizer = AutoTokenizer.from_pretrained("hackathon-pln-es/jurisbert-class-tratados-internacionales-sistema-universal")
|
56 |
-
|
57 |
-
model = AutoModelForSequenceClassification.from_pretrained("hackathon-pln-es/jurisbert-class-tratados-internacionales-sistema-universal")
|
58 |
-
text_classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
|
59 |
-
results= text_classifier (example)
|
60 |
-
|
61 |
-
salida=[]
|
62 |
-
for i in results:
|
63 |
-
salida.append({i["label"]:i["score"]})
|
64 |
-
|
65 |
-
#return results[0]["label"], round(results[0]["score"], 5)
|
66 |
-
|
67 |
-
|
68 |
-
return {i["label"]: float(i["score"]) for i in results}
|
69 |
-
|
70 |
-
def clasifica_conv_americana(example):
|
71 |
-
tokenizer = AutoTokenizer.from_pretrained("hackathon-pln-es/jurisbert-clas-art-convencion-americana-dh")
|
72 |
-
|
73 |
-
model = AutoModelForSequenceClassification.from_pretrained("hackathon-pln-es/jurisbert-clas-art-convencion-americana-dh")
|
74 |
-
|
75 |
-
text_classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
|
76 |
-
results= text_classifier (example)
|
77 |
-
|
78 |
-
return {i["label"]: float(i["score"]) for i in results}
|
79 |
-
|
80 |
-
def similitud(example,example2):
|
81 |
-
model = SentenceTransformer("hackathon-pln-es/jurisbert-tsdae-sentence-transformer")
|
82 |
-
#Compute embedding for both lists
|
83 |
-
embeddings1 = model.encode(example, convert_to_tensor=True)
|
84 |
-
embeddings2 = model.encode(example2, convert_to_tensor=True)
|
85 |
-
|
86 |
-
#Compute cosine-similarits
|
87 |
-
cosine_scores = util.cos_sim(embeddings1, embeddings2)
|
88 |
-
|
89 |
-
return float(cosine_scores[0])*100
|
90 |
-
|
91 |
-
|
92 |
-
def process(example,example2):
|
93 |
-
entidades = get_entities(example)
|
94 |
-
|
95 |
-
class_sistema_universal = clasifica_sistema_universal(example)
|
96 |
-
|
97 |
-
class_conv_americana = clasifica_conv_americana(example)
|
98 |
-
|
99 |
-
score_similitud = similitud(example,example2)
|
100 |
-
|
101 |
-
entidades2 = get_entities(example2)
|
102 |
-
|
103 |
-
class_sistema_universal2 = clasifica_sistema_universal(example2)
|
104 |
-
|
105 |
-
class_conv_americana2 = clasifica_conv_americana(example2)
|
106 |
-
return entidades,class_sistema_universal, class_conv_americana, score_similitud , entidades2 ,class_sistema_universal2, class_conv_americana2
|
107 |
-
|
108 |
-
input_sen = gr.inputs.Textbox(lines=10, label="Texto a analizar:")
|
109 |
-
|
110 |
-
input_sen2 = gr.inputs.Textbox(lines=10, label="Texto a comparar:")
|
111 |
-
|
112 |
-
#### Resultados texto analizar:
|
113 |
-
output_hgtxt= gr.outputs.HighlightedText(label="Reconocimiento de entidades:")
|
114 |
-
output_lbl1= gr.outputs.Label(label="Clasificaci贸n modelo sistema universal:")
|
115 |
-
output_lbl2= gr.outputs.Label(label="Clasificaci贸n modelo convenci贸n americana:")
|
116 |
-
|
117 |
-
#### Resultados de la similitud
|
118 |
-
output_txt= gr.outputs.Textbox(label="Porcentaje de similitud entre los textos proporcionados:")
|
119 |
-
|
120 |
-
#### Resultados texto a comparar:
|
121 |
-
output_hgtxt2= gr.outputs.HighlightedText(label="Reconocimiento de entidades:")
|
122 |
-
output_lbl3= gr.outputs.Label(label="Clasificaci贸n modelo sistema universal:")
|
123 |
-
output_lbl4= gr.outputs.Label(label="Clasificaci贸n modelo convenci贸n americana:")
|
124 |
-
|
125 |
-
#iface = gr.Interface(fn=process, inputs=input_sen, outputs=["highlight","label","label"], examples=examples, title=title, description = description)
|
126 |
-
|
127 |
-
iface = gr.Interface(fn=process, inputs=[input_sen, input_sen2], outputs=[output_hgtxt,output_lbl2,output_lbl2,output_txt,output_hgtxt2,output_lbl3,output_lbl4], examples=examples, title=title, description = description, article=article)
|
128 |
-
|
129 |
-
iface.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|