import gradio as gr from app_data import examples from app_details import title, description, article from transformers import AutoModelForTokenClassification,AutoModelForSequenceClassification, AutoTokenizer, pipeline from sentence_transformers import SentenceTransformer, util def get_entities(example): model_name = "hackathon-pln-es/jurisbert-finetuning-ner" tokenizer = AutoTokenizer.from_pretrained(model_name, add_prefix_space=True) model = AutoModelForTokenClassification.from_pretrained(model_name) token_classifier = pipeline("token-classification", aggregation_strategy="simple", model=model, tokenizer=tokenizer) results = token_classifier(example.lower()) output = [] i=0 item = None prev_item = None next_item = None while i < (len(results)): item = results[i] p=i-1 n=i+1 if p > 0: prev_item = results[p] if n<(len(results)): next_item = results[n] if (i==0): if item["start"]>0: output.extend([(example[0:item["start"]], None)]) output.extend([(example[item["start"]:item["end"]], item["entity_group"])]) if (next_item!=None): ##verificar el tramo entre actual y siguiente if(item["end"]!=next_item["start"]): output.extend([(example[item["end"]:next_item["start"]], None)]) i=i+1 if (item!=None): if (item["end"] < len(example)): output.extend([(example[item["end"]:len(example)], None)]) return output def clasifica_sistema_universal(example): tokenizer = AutoTokenizer.from_pretrained("hackathon-pln-es/jurisbert-class-tratados-internacionales-sistema-universal") model = AutoModelForSequenceClassification.from_pretrained("hackathon-pln-es/jurisbert-class-tratados-internacionales-sistema-universal") text_classifier = pipeline("text-classification", model=model, tokenizer=tokenizer) results= text_classifier (example) salida=[] for i in results: salida.append({i["label"]:i["score"]}) #return results[0]["label"], round(results[0]["score"], 5) return {i["label"]: float(i["score"]) for i in results} def clasifica_conv_americana(example): tokenizer = AutoTokenizer.from_pretrained("hackathon-pln-es/jurisbert-clas-art-convencion-americana-dh") model = AutoModelForSequenceClassification.from_pretrained("hackathon-pln-es/jurisbert-clas-art-convencion-americana-dh") text_classifier = pipeline("text-classification", model=model, tokenizer=tokenizer) results= text_classifier (example) return {i["label"]: float(i["score"]) for i in results} def similitud(example,example2): model = SentenceTransformer("hackathon-pln-es/jurisbert-tsdae-sentence-transformer") #Compute embedding for both lists embeddings1 = model.encode(example, convert_to_tensor=True) embeddings2 = model.encode(example2, convert_to_tensor=True) #Compute cosine-similarits cosine_scores = util.cos_sim(embeddings1, embeddings2) return float(cosine_scores[0])*100 def process(example,example2): entidades = get_entities(example) class_sistema_universal = clasifica_sistema_universal(example) class_conv_americana = clasifica_conv_americana(example) score_similitud = similitud(example,example2) entidades2 = get_entities(example2) class_sistema_universal2 = clasifica_sistema_universal(example2) class_conv_americana2 = clasifica_conv_americana(example2) return entidades,class_sistema_universal, class_conv_americana, score_similitud , entidades2 ,class_sistema_universal2, class_conv_americana2 input_sen = gr.inputs.Textbox(lines=10, label="Texto a analizar:") input_sen2 = gr.inputs.Textbox(lines=10, label="Texto a comparar:") #### Resultados texto analizar: output_hgtxt= gr.outputs.HighlightedText(label="Reconocimiento de entidades:") output_lbl1= gr.outputs.Label(label="Clasificación modelo sistema universal:") output_lbl2= gr.outputs.Label(label="Clasificación modelo convención americana:") #### Resultados de la similitud output_txt= gr.outputs.Textbox(label="Porcentaje de similitud entre los textos proporcionados:") #### Resultados texto a comparar: output_hgtxt2= gr.outputs.HighlightedText(label="Reconocimiento de entidades:") output_lbl3= gr.outputs.Label(label="Clasificación modelo sistema universal:") output_lbl4= gr.outputs.Label(label="Clasificación modelo convención americana:") #iface = gr.Interface(fn=process, inputs=input_sen, outputs=["highlight","label","label"], examples=examples, title=title, description = description) iface = gr.Interface(fn=process, inputs=[input_sen, input_sen2], outputs=[output_hgtxt,output_lbl1,output_lbl2,output_txt,output_hgtxt2,output_lbl3,output_lbl4], examples=examples, title=title, description = description, article=article) iface.launch()