import nltk from nltk.tokenize import word_tokenize from sentence_transformers import SentenceTransformer, util import numpy # from nltk.stem import WordNetLemmatizer import pickle import re nltk.download('punkt') class AmbguityDetector: def __init__(self): self.model = SentenceTransformer( 'sentence-transformers/all-MiniLM-L6-v2') def sentence_ambiguity(self, sentence): model = self.model tokens = word_tokenize(sentence) filtered_tokens = list() for token in tokens: if token not in stopwords_custom: filtered_tokens.append(token) for i in filtered_tokens: filtered_tokens[filtered_tokens.index(i)] = i.lower() if i in punctuation: filtered_tokens.remove(i) lexical = dict() scope = dict() referential = dict() vague = dict() coordination = dict() ambiguity = dict() ambiguous_words = list() words_set=list() for i in filtered_tokens: temp = model.encode(i, convert_to_tensor=True) for j in lexical_AMB: temp2 = lexical_encoded[j] cos_sim = util.pytorch_cos_sim( temp, temp2).numpy().reshape([1, ]) if(cos_sim[0] >= 0.6): ambiguous_words.append(i) words_set.append((i,"lexical")) lexical[i+"+"+j] = cos_sim[0] for j in scope_AMB: temp2 = scope_encoded[j] cos_sim = util.pytorch_cos_sim( temp, temp2).numpy().reshape([1, ]) if(cos_sim[0] >= 0.6): ambiguous_words.append(i) words_set.append((i,"scope")) scope[i+"+"+j] = cos_sim[0] for j in referential_AMB: temp2 = referential_encoded[j] cos_sim = util.pytorch_cos_sim( temp, temp2).numpy().reshape([1, ]) if(cos_sim[0] >= 0.6): ambiguous_words.append(i) words_set.append((i,"referential")) referential[i+"+"+j] = cos_sim[0] for j in vague_AMB: temp2 = vague_encoded[j] cos_sim = util.pytorch_cos_sim( temp, temp2).numpy().reshape([1, ]) if(cos_sim[0] >= 0.6): ambiguous_words.append(i) words_set.append((i,"vague")) vague[i+"+"+j] = cos_sim[0] for j in coordination_AMB: temp2 = coordination_encoded[j] cos_sim = util.pytorch_cos_sim( temp, temp2).numpy().reshape([1, ]) if(cos_sim[0] >= 0.6): ambiguous_words.append(i) words_set.append((i,"coordination")) coordination[i+"+"+j] = cos_sim[0] ambiguous_words = list(dict.fromkeys(ambiguous_words)) ambiguity["lexical"] = lexical ambiguity["referential"] = referential ambiguity["scope"] = scope ambiguity["vague"] = vague ambiguity["coordination"] = coordination ambiguity["words"] = ambiguous_words ambiguity["lexical_st"]=words_set # print(filtered_tokens) # print(ambiguity) return ambiguity["lexical_st"]