Spaces:
Runtime error
Runtime error
import gradio as gr | |
from app_data import examples | |
from app_details import title, description, article | |
from transformers import AutoModelForTokenClassification,AutoModelForSequenceClassification, AutoTokenizer, pipeline | |
from sentence_transformers import SentenceTransformer, util | |
def get_entities(example): | |
model_name = "hackathon-pln-es/jurisbert-finetuning-ner" | |
tokenizer = AutoTokenizer.from_pretrained(model_name, add_prefix_space=True) | |
model = AutoModelForTokenClassification.from_pretrained(model_name) | |
token_classifier = pipeline("token-classification", aggregation_strategy="simple", model=model, tokenizer=tokenizer) | |
results = token_classifier(example.lower()) | |
output = [] | |
i=0 | |
item = None | |
prev_item = None | |
next_item = None | |
while i < (len(results)): | |
item = results[i] | |
p=i-1 | |
n=i+1 | |
if p > 0: | |
prev_item = results[p] | |
if n<(len(results)): | |
next_item = results[n] | |
if (i==0): | |
if item["start"]>0: | |
output.extend([(example[0:item["start"]], None)]) | |
output.extend([(example[item["start"]:item["end"]], item["entity_group"])]) | |
if (next_item!=None): | |
##verificar el tramo entre actual y siguiente | |
if(item["end"]!=next_item["start"]): | |
output.extend([(example[item["end"]:next_item["start"]], None)]) | |
i=i+1 | |
if (item!=None): | |
if (item["end"] < len(example)): | |
output.extend([(example[item["end"]:len(example)], None)]) | |
return output | |
def clasifica_sistema_universal(example): | |
tokenizer = AutoTokenizer.from_pretrained("hackathon-pln-es/jurisbert-class-tratados-internacionales-sistema-universal") | |
model = AutoModelForSequenceClassification.from_pretrained("hackathon-pln-es/jurisbert-class-tratados-internacionales-sistema-universal") | |
text_classifier = pipeline("text-classification", model=model, tokenizer=tokenizer) | |
results= text_classifier (example) | |
salida=[] | |
for i in results: | |
salida.append({i["label"]:i["score"]}) | |
#return results[0]["label"], round(results[0]["score"], 5) | |
return {i["label"]: float(i["score"]) for i in results} | |
def clasifica_conv_americana(example): | |
tokenizer = AutoTokenizer.from_pretrained("hackathon-pln-es/jurisbert-clas-art-convencion-americana-dh") | |
model = AutoModelForSequenceClassification.from_pretrained("hackathon-pln-es/jurisbert-clas-art-convencion-americana-dh") | |
text_classifier = pipeline("text-classification", model=model, tokenizer=tokenizer) | |
results= text_classifier (example) | |
return {i["label"]: float(i["score"]) for i in results} | |
def similitud(example,example2): | |
model = SentenceTransformer("hackathon-pln-es/jurisbert-tsdae-sentence-transformer") | |
#Compute embedding for both lists | |
embeddings1 = model.encode(example, convert_to_tensor=True) | |
embeddings2 = model.encode(example2, convert_to_tensor=True) | |
#Compute cosine-similarits | |
cosine_scores = util.cos_sim(embeddings1, embeddings2) | |
return float(cosine_scores[0])*100 | |
def process(example,example2): | |
entidades = get_entities(example) | |
class_sistema_universal = clasifica_sistema_universal(example) | |
class_conv_americana = clasifica_conv_americana(example) | |
score_similitud = similitud(example,example2) | |
entidades2 = get_entities(example2) | |
class_sistema_universal2 = clasifica_sistema_universal(example2) | |
class_conv_americana2 = clasifica_conv_americana(example2) | |
return entidades,class_sistema_universal, class_conv_americana, score_similitud , entidades2 ,class_sistema_universal2, class_conv_americana2 | |
input_sen = gr.inputs.Textbox(lines=10, label="Texto a analizar:") | |
input_sen2 = gr.inputs.Textbox(lines=10, label="Texto a comparar:") | |
#### Resultados texto analizar: | |
output_hgtxt= gr.outputs.HighlightedText(label="Reconocimiento de entidades:") | |
output_lbl1= gr.outputs.Label(label="Clasificaci贸n modelo sistema universal:") | |
output_lbl2= gr.outputs.Label(label="Clasificaci贸n modelo convenci贸n americana:") | |
#### Resultados de la similitud | |
output_txt= gr.outputs.Textbox(label="Porcentaje de similitud entre los textos proporcionados:") | |
#### Resultados texto a comparar: | |
output_hgtxt2= gr.outputs.HighlightedText(label="Reconocimiento de entidades:") | |
output_lbl3= gr.outputs.Label(label="Clasificaci贸n modelo sistema universal:") | |
output_lbl4= gr.outputs.Label(label="Clasificaci贸n modelo convenci贸n americana:") | |
#iface = gr.Interface(fn=process, inputs=input_sen, outputs=["highlight","label","label"], examples=examples, title=title, description = description) | |
iface = gr.Interface(fn=process, inputs=[input_sen, input_sen2], outputs=[output_hgtxt,output_lbl1,output_lbl2,output_txt,output_hgtxt2,output_lbl3,output_lbl4], examples=examples, title=title, description = description, article=article) | |
iface.launch() | |