GPalomeque commited on
Commit
766117c
1 Parent(s): f98e6e4

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +129 -0
app.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ from app_data import examples
4
+
5
+ from app_details import title, description, article
6
+
7
+ from transformers import AutoModelForTokenClassification,AutoModelForSequenceClassification, AutoTokenizer, pipeline
8
+
9
+ from sentence_transformers import SentenceTransformer, util
10
+
11
+ def get_entities(example):
12
+ model_name = "hackathon-pln-es/jurisbert-finetuning-ner"
13
+ tokenizer = AutoTokenizer.from_pretrained(model_name, add_prefix_space=True)
14
+
15
+ model = AutoModelForTokenClassification.from_pretrained(model_name)
16
+ token_classifier = pipeline("token-classification", aggregation_strategy="simple", model=model, tokenizer=tokenizer)
17
+ results = token_classifier(example.lower())
18
+
19
+ output = []
20
+
21
+ i=0
22
+ item = None
23
+ prev_item = None
24
+ next_item = None
25
+ while i < (len(results)):
26
+ item = results[i]
27
+ p=i-1
28
+ n=i+1
29
+
30
+ if p > 0:
31
+ prev_item = results[p]
32
+
33
+
34
+ if n<(len(results)):
35
+ next_item = results[n]
36
+
37
+
38
+ if (i==0):
39
+ if item["start"]>0:
40
+ output.extend([(example[0:item["start"]], None)])
41
+ output.extend([(example[item["start"]:item["end"]], item["entity_group"])])
42
+ if (next_item!=None):
43
+ ##verificar el tramo entre actual y siguiente
44
+ if(item["end"]!=next_item["start"]):
45
+ output.extend([(example[item["end"]:next_item["start"]], None)])
46
+ i=i+1
47
+
48
+ if (item!=None):
49
+ if (item["end"] < len(example)):
50
+ output.extend([(example[item["end"]:len(example)], None)])
51
+
52
+ return output
53
+
54
+ def clasifica_sistema_universal(example):
55
+ tokenizer = AutoTokenizer.from_pretrained("hackathon-pln-es/jurisbert-class-tratados-internacionales-sistema-universal")
56
+
57
+ model = AutoModelForSequenceClassification.from_pretrained("hackathon-pln-es/jurisbert-class-tratados-internacionales-sistema-universal")
58
+ text_classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
59
+ results= text_classifier (example)
60
+
61
+ salida=[]
62
+ for i in results:
63
+ salida.append({i["label"]:i["score"]})
64
+
65
+ #return results[0]["label"], round(results[0]["score"], 5)
66
+
67
+
68
+ return {i["label"]: float(i["score"]) for i in results}
69
+
70
+ def clasifica_conv_americana(example):
71
+ tokenizer = AutoTokenizer.from_pretrained("hackathon-pln-es/jurisbert-clas-art-convencion-americana-dh")
72
+
73
+ model = AutoModelForSequenceClassification.from_pretrained("hackathon-pln-es/jurisbert-clas-art-convencion-americana-dh")
74
+
75
+ text_classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
76
+ results= text_classifier (example)
77
+
78
+ return {i["label"]: float(i["score"]) for i in results}
79
+
80
+ def similitud(example,example2):
81
+ model = SentenceTransformer("hackathon-pln-es/jurisbert-tsdae-sentence-transformer")
82
+ #Compute embedding for both lists
83
+ embeddings1 = model.encode(example, convert_to_tensor=True)
84
+ embeddings2 = model.encode(example2, convert_to_tensor=True)
85
+
86
+ #Compute cosine-similarits
87
+ cosine_scores = util.cos_sim(embeddings1, embeddings2)
88
+
89
+ return float(cosine_scores[0])*100
90
+
91
+
92
+ def process(example,example2):
93
+ entidades = get_entities(example)
94
+
95
+ class_sistema_universal = clasifica_sistema_universal(example)
96
+
97
+ class_conv_americana = clasifica_conv_americana(example)
98
+
99
+ score_similitud = similitud(example,example2)
100
+
101
+ entidades2 = get_entities(example2)
102
+
103
+ class_sistema_universal2 = clasifica_sistema_universal(example2)
104
+
105
+ class_conv_americana2 = clasifica_conv_americana(example2)
106
+ return entidades,class_sistema_universal, class_conv_americana, score_similitud , entidades2 ,class_sistema_universal2, class_conv_americana2
107
+
108
+ input_sen = gr.inputs.Textbox(lines=10, label="Texto a analizar:")
109
+
110
+ input_sen2 = gr.inputs.Textbox(lines=10, label="Texto a comparar:")
111
+
112
+ #### Resultados texto analizar:
113
+ output_hgtxt= gr.outputs.HighlightedText(label="Reconocimiento de entidades:")
114
+ output_lbl1= gr.outputs.Label(label="Clasificación modelo sistema universal:")
115
+ output_lbl2= gr.outputs.Label(label="Clasificación modelo convención americana:")
116
+
117
+ #### Resultados de la similitud
118
+ output_txt= gr.outputs.Textbox(label="Porcentaje de similitud entre los textos proporcionados:")
119
+
120
+ #### Resultados texto a comparar:
121
+ output_hgtxt2= gr.outputs.HighlightedText(label="Reconocimiento de entidades:")
122
+ output_lbl3= gr.outputs.Label(label="Clasificación modelo sistema universal:")
123
+ output_lbl4= gr.outputs.Label(label="Clasificación modelo convención americana:")
124
+
125
+ #iface = gr.Interface(fn=process, inputs=input_sen, outputs=["highlight","label","label"], examples=examples, title=title, description = description)
126
+
127
+ iface = gr.Interface(fn=process, inputs=[input_sen, input_sen2], outputs=[output_hgtxt,output_lbl2,output_lbl2,output_txt,output_hgtxt2,output_lbl3,output_lbl4], examples=examples, title=title, description = description, article=article)
128
+
129
+ iface.launch()