File size: 4,371 Bytes
f3fe141 5d327fe 738d321 5d327fe 738d321 5d327fe f3fe141 5d327fe 936d180 3d2e947 f3fe141 3d2e947 8df1b3b 3d2e947 f3fe141 20801dd 3d2e947 aa42b04 7a88212 ee74ba4 da98d97 ee74ba4 7a88212 da98d97 7a88212 ee74ba4 2a1ebf7 25d06af 2a1ebf7 53d0b05 2a1ebf7 936d180 aa42b04 7a88212 da98d97 7a88212 2a1ebf7 7a88212 2a1ebf7 7a88212 738d321 7a88212 2a1ebf7 e50d585 da98d97 3ba2d59 e50d585 2a1ebf7 ee74ba4 777c11a d988e7a 7a88212 6daba21 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
import gradio as gr
from app_data import examples
from app_details import title, description, article
from transformers import AutoModelForTokenClassification,AutoModelForSequenceClassification, AutoTokenizer, pipeline
from sentence_transformers import SentenceTransformer, util
def get_entities(example):
model_name = "hackathon-pln-es/jurisbert-finetuning-ner"
tokenizer = AutoTokenizer.from_pretrained(model_name, add_prefix_space=True)
model = AutoModelForTokenClassification.from_pretrained(model_name)
token_classifier = pipeline("token-classification", aggregation_strategy="simple", model=model, tokenizer=tokenizer)
results = token_classifier(example.lower())
output = []
i=0
item = None
prev_item = None
next_item = None
while i < (len(results)):
item = results[i]
p=i-1
n=i+1
if p > 0:
prev_item = results[p]
if n<(len(results)):
next_item = results[n]
if (i==0):
if item["start"]>0:
output.extend([(example[0:item["start"]], None)])
output.extend([(example[item["start"]:item["end"]], item["entity_group"])])
if (next_item!=None):
##verificar el tramo entre actual y siguiente
if(item["end"]!=next_item["start"]):
output.extend([(example[item["end"]:next_item["start"]], None)])
i=i+1
if (item!=None):
if (item["end"] < len(example)):
output.extend([(example[item["end"]:len(example)], None)])
return output
def clasifica_sistema_universal(example):
tokenizer = AutoTokenizer.from_pretrained("hackathon-pln-es/jurisbert-class-tratados-internacionales-sistema-universal")
model = AutoModelForSequenceClassification.from_pretrained("hackathon-pln-es/jurisbert-class-tratados-internacionales-sistema-universal")
text_classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
results= text_classifier (example)
salida=[]
for i in results:
salida.append({i["label"]:i["score"]})
#return results[0]["label"], round(results[0]["score"], 5)
return {i["label"]: float(i["score"]) for i in results}
def clasifica_conv_americana(example):
tokenizer = AutoTokenizer.from_pretrained("hackathon-pln-es/jurisbert-clas-art-convencion-americana-dh")
model = AutoModelForSequenceClassification.from_pretrained("hackathon-pln-es/jurisbert-clas-art-convencion-americana-dh")
text_classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
results= text_classifier (example)
return {i["label"]: float(i["score"]) for i in results}
def similitud(example,example2):
model = SentenceTransformer("hackathon-pln-es/jurisbert-tsdae-sentence-transformer")
#Compute embedding for both lists
embeddings1 = model.encode(example, convert_to_tensor=True)
embeddings2 = model.encode(example2, convert_to_tensor=True)
#Compute cosine-similarits
cosine_scores = util.cos_sim(embeddings1, embeddings2)
return float(cosine_scores[0])*100
def process(example,example2):
entidades = get_entities(example)
class_sistema_universal = clasifica_sistema_universal(example)
class_conv_americana = clasifica_conv_americana(example)
score_similitud = similitud(example,example2)
return entidades,class_sistema_universal, class_conv_americana, score_similitud
input_sen = gr.inputs.Textbox(lines=10, label="Proporcione el texto a analizar:")
input_sen2 = gr.inputs.Textbox(lines=10, label="Proporcione el texto a comparar:")
output_hgtxt= gr.outputs.HighlightedText(label="Reconocimiento de entidades:")
output_lbl1= gr.outputs.Label(label="Clasificaci贸n modelo convenci贸n americana:")
output_lbl2= gr.outputs.Label(label="Clasificaci贸n modelo sistema universal:")
output_txt= gr.outputs.Textbox(label="Porcentaje de similitud entre los textos proporcionados:")
#iface = gr.Interface(fn=process, inputs=input_sen, outputs=["highlight","label","label"], examples=examples, title=title, description = description)
iface = gr.Interface(fn=process, inputs=[input_sen, input_sen2], outputs=[output_hgtxt,output_lbl2,output_lbl2,output_txt], examples=examples, title=title, description = description)
iface.launch()
|