import gradio as gr from transformers import pipeline import requests import json from transformers import AutoTokenizer, AutoModelForTokenClassification from nltk.tokenize import sent_tokenize tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER") model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER") nlp = pipeline("ner", model=model, tokenizer=tokenizer) classifier = pipeline("text-classification",model='bhadresh-savani/distilbert-base-uncased-emotion', return_all_scores=True) def detect_emotion(lista, emotion_threshold, minimum_words): thresh = emotion_threshold mini = minimum_words prediction = classifier(lista) listb = [] for i in range(len(prediction)): for k in prediction[i]: if k["score"]>thresh and len(lista[i].split())>mini: a = (i,k["label"],k["score"]) listb.append(a) listc = [] for i in listb: val = (i[0],lista[i[0]],i[1],i[2]) listc.append(val) return listc def detect_entity3(lista): ner_results = nlp(lista) listb = [] entity = [] for i in range(len(ner_results)): end = -10 old = None for j in range(len(ner_results[i])): if int(ner_results[i][j]["start"])==int(end): if j==len(ner_results[i])-1: appen = ner_results[i][j]["word"].replace("#", "") old["word"] = old["word"] + appen if old["score"]< ner_results[i][j]["score"]: old["score"] = ner_results[i][j]["score"] val = (i,old["word"],old["score"],ner_results[i][j]['entity'],old["start"]) #can do j-1 also here entity.append(val) else: appen = ner_results[i][j]["word"].replace("#", "") old["word"] = old["word"] + appen if old["score"]< ner_results[i][j]["score"]: old["score"] = ner_results[i][j]["score"] end = ner_results[i][j]["end"] else: if old is not None: if j==len(ner_results[i])-1: val = (i,old["word"],old["score"],ner_results[i][j-1]['entity'],old["start"]) entity.append(val) #print("\n") old["word"] = ner_results[i][j]["word"] old["score"] = ner_results[i][j]["score"] old["start"] = ner_results[i][j]["start"] val = (i,old["word"],old["score"],ner_results[i][j]['entity'],old["start"]) entity.append(val) else: val = (i,old["word"],old["score"],ner_results[i][j-1]['entity'],old["start"]) entity.append(val) #print("\n") old["word"] = ner_results[i][j]["word"] old["score"] = ner_results[i][j]["score"] old["start"] = ner_results[i][j]["start"] end = ner_results[i][j]["end"] else: old = {} old["word"] = ner_results[i][j]["word"] old["score"] = ner_results[i][j]["score"] old["start"] = ner_results[i][j]["start"] end = ner_results[i][j]["end"] listc = [] for i in entity: val = (i[0],lista[i[0]],i[1],i[2],i[4]) listc.append(val) return listc def compare_and_print(output1,output2): dicta = {} for i in output1: dicta[i[0]] = "No" for i in output2: if i[0] in dicta: dicta[i[0]] = "Yes" flag = 0 both = [] for i in dicta: if dicta[i]=="Yes": flag=1 both.append(i) return both def detect_tam(entity_output, tam_list): dicta = {} for i in entity_output: for j in range(len(tam_list)): comp = tam_list[j].split() if i[2].lower() == comp[0].lower(): if i[1][i[4]:i[4]+len(tam_list[j])].lower()==tam_list[j].lower(): if i[0] not in dicta: dicta[i[0]] = [] dicta[i[0]].append(j) else: if j in dicta[i[0]]: pass else: dicta[i[0]].append(j) return dicta def myFunc(e): return e[1] def integrate_all(text,threshold, min_words, max_detection, max_tam_detection): out_text = "" emotion_threshold = threshold minimum_words = min_words emotion_number = max_detection #3 both_number = max_tam_detection #5 emotion_number = int(emotion_number) both_number = int(both_number) #tam_number = 4 lista = sent_tokenize(text) emotion_out = detect_emotion(lista,emotion_threshold, minimum_words) out_text = out_text + "##Selected based on emotions##" out_text = out_text + "\n---------------------------" dicta = {} for i in emotion_out: if i[2] not in dicta: dicta[i[2]] = [] dicta[i[2]].append((i[0],i[3])) else: dicta[i[2]].append((i[0],i[3])) for i in dicta: dicta[i].sort(reverse=True,key=myFunc) #print(dicta) emotion_selected = [] for i in dicta: val_em = 0 if len(dicta[i])=i[0]-2+8+1: sel_val = 8 else: sel_val = len(lista)-1-(i[0]-2+8) for j in range(sel_val): out_text = out_text + "\n" + str(-2+j) + ", " + str(lista[i[0]-2+j]) out_text = out_text + "\n---------------------------" out_text = out_text + "\n---------------------------" out_text = out_text + "\n##Selected based on presence of both named entity then followed by highest emotions##" out_text = out_text + "\n---------------------------" entity_out = detect_entity3(lista) both = compare_and_print(emotion_out,entity_out) both_selected = [] if len(both)<=both_number: for i in both: both_selected.append((i,None)) else: list_em = [] for i in both: for j in emotion_out: if i==j[0]: list_em.append((i,j[3])) list_em.sort(reverse=True, key=myFunc) for i in range(both_number): both_selected.append(list_em[i]) for i in both_selected: if len(lista)>=i[0]-2+8+1: sel_val = 8 else: sel_val = len(lista)-1-(i[0]-2+8) for j in range(sel_val): out_text = out_text + "\n" + str(-2+j) + ", " + str(lista[i[0]-2+j]) out_text = out_text + "\n---------------------------" out_text = out_text + "\n---------------------------" """print("##Selected based on presence of no. of TAM##") print("-------------------------------") if len(tam_list)==0: print("No TAM list provided hence no selection based on TAM") tam_output = detect_tam(entity_out, tam_list) no_of_tam = {} count = 0 tam_numbers=[] for i in tam_output: count = count+1 if len(tam_output[i]) not in no_of_tam: no_of_tam[len(tam_output[i])] = [i] tam_numbers.append(len(tam_output[i])) else: no_of_tam[len(tam_output[i])].append(i) tam_numbers.sort(reverse=True) if count