GPalomeque commited on
Commit
f98e6e4
1 Parent(s): 1d52e47

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -129
app.py DELETED
@@ -1,129 +0,0 @@
1
- import gradio as gr
2
-
3
- from app_data import examples
4
-
5
- from app_details import title, description, article
6
-
7
- from transformers import AutoModelForTokenClassification,AutoModelForSequenceClassification, AutoTokenizer, pipeline
8
-
9
- from sentence_transformers import SentenceTransformer, util
10
-
11
- def get_entities(example):
12
- model_name = "hackathon-pln-es/jurisbert-finetuning-ner"
13
- tokenizer = AutoTokenizer.from_pretrained(model_name, add_prefix_space=True)
14
-
15
- model = AutoModelForTokenClassification.from_pretrained(model_name)
16
- token_classifier = pipeline("token-classification", aggregation_strategy="simple", model=model, tokenizer=tokenizer)
17
- results = token_classifier(example.lower())
18
-
19
- output = []
20
-
21
- i=0
22
- item = None
23
- prev_item = None
24
- next_item = None
25
- while i < (len(results)):
26
- item = results[i]
27
- p=i-1
28
- n=i+1
29
-
30
- if p > 0:
31
- prev_item = results[p]
32
-
33
-
34
- if n<(len(results)):
35
- next_item = results[n]
36
-
37
-
38
- if (i==0):
39
- if item["start"]>0:
40
- output.extend([(example[0:item["start"]], None)])
41
- output.extend([(example[item["start"]:item["end"]], item["entity_group"])])
42
- if (next_item!=None):
43
- ##verificar el tramo entre actual y siguiente
44
- if(item["end"]!=next_item["start"]):
45
- output.extend([(example[item["end"]:next_item["start"]], None)])
46
- i=i+1
47
-
48
- if (item!=None):
49
- if (item["end"] < len(example)):
50
- output.extend([(example[item["end"]:len(example)], None)])
51
-
52
- return output
53
-
54
- def clasifica_sistema_universal(example):
55
- tokenizer = AutoTokenizer.from_pretrained("hackathon-pln-es/jurisbert-class-tratados-internacionales-sistema-universal")
56
-
57
- model = AutoModelForSequenceClassification.from_pretrained("hackathon-pln-es/jurisbert-class-tratados-internacionales-sistema-universal")
58
- text_classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
59
- results= text_classifier (example)
60
-
61
- salida=[]
62
- for i in results:
63
- salida.append({i["label"]:i["score"]})
64
-
65
- #return results[0]["label"], round(results[0]["score"], 5)
66
-
67
-
68
- return {i["label"]: float(i["score"]) for i in results}
69
-
70
- def clasifica_conv_americana(example):
71
- tokenizer = AutoTokenizer.from_pretrained("hackathon-pln-es/jurisbert-clas-art-convencion-americana-dh")
72
-
73
- model = AutoModelForSequenceClassification.from_pretrained("hackathon-pln-es/jurisbert-clas-art-convencion-americana-dh")
74
-
75
- text_classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
76
- results= text_classifier (example)
77
-
78
- return {i["label"]: float(i["score"]) for i in results}
79
-
80
- def similitud(example,example2):
81
- model = SentenceTransformer("hackathon-pln-es/jurisbert-tsdae-sentence-transformer")
82
- #Compute embedding for both lists
83
- embeddings1 = model.encode(example, convert_to_tensor=True)
84
- embeddings2 = model.encode(example2, convert_to_tensor=True)
85
-
86
- #Compute cosine-similarits
87
- cosine_scores = util.cos_sim(embeddings1, embeddings2)
88
-
89
- return float(cosine_scores[0])*100
90
-
91
-
92
- def process(example,example2):
93
- entidades = get_entities(example)
94
-
95
- class_sistema_universal = clasifica_sistema_universal(example)
96
-
97
- class_conv_americana = clasifica_conv_americana(example)
98
-
99
- score_similitud = similitud(example,example2)
100
-
101
- entidades2 = get_entities(example2)
102
-
103
- class_sistema_universal2 = clasifica_sistema_universal(example2)
104
-
105
- class_conv_americana2 = clasifica_conv_americana(example2)
106
- return entidades,class_sistema_universal, class_conv_americana, score_similitud , entidades2 ,class_sistema_universal2, class_conv_americana2
107
-
108
- input_sen = gr.inputs.Textbox(lines=10, label="Texto a analizar:")
109
-
110
- input_sen2 = gr.inputs.Textbox(lines=10, label="Texto a comparar:")
111
-
112
- #### Resultados texto analizar:
113
- output_hgtxt= gr.outputs.HighlightedText(label="Reconocimiento de entidades:")
114
- output_lbl1= gr.outputs.Label(label="Clasificación modelo sistema universal:")
115
- output_lbl2= gr.outputs.Label(label="Clasificación modelo convención americana:")
116
-
117
- #### Resultados de la similitud
118
- output_txt= gr.outputs.Textbox(label="Porcentaje de similitud entre los textos proporcionados:")
119
-
120
- #### Resultados texto a comparar:
121
- output_hgtxt2= gr.outputs.HighlightedText(label="Reconocimiento de entidades:")
122
- output_lbl3= gr.outputs.Label(label="Clasificación modelo sistema universal:")
123
- output_lbl4= gr.outputs.Label(label="Clasificación modelo convención americana:")
124
-
125
- #iface = gr.Interface(fn=process, inputs=input_sen, outputs=["highlight","label","label"], examples=examples, title=title, description = description)
126
-
127
- iface = gr.Interface(fn=process, inputs=[input_sen, input_sen2], outputs=[output_hgtxt,output_lbl2,output_lbl2,output_txt,output_hgtxt2,output_lbl3,output_lbl4], examples=examples, title=title, description = description, article=article)
128
-
129
- iface.launch()