Spaces:
Sleeping
Sleeping
import nltk | |
from nltk.tokenize import word_tokenize | |
from sentence_transformers import SentenceTransformer, util | |
import numpy | |
# from nltk.stem import WordNetLemmatizer | |
import pickle | |
import re | |
nltk.download('punkt') | |
class AmbguityDetector: | |
def __init__(self): | |
self.model = SentenceTransformer( | |
'sentence-transformers/all-MiniLM-L6-v2') | |
def sentence_ambiguity(self, sentence): | |
model = self.model | |
tokens = word_tokenize(sentence) | |
filtered_tokens = list() | |
for token in tokens: | |
if token not in stopwords_custom: | |
filtered_tokens.append(token) | |
for i in filtered_tokens: | |
filtered_tokens[filtered_tokens.index(i)] = i.lower() | |
if i in punctuation: | |
filtered_tokens.remove(i) | |
lexical = dict() | |
scope = dict() | |
referential = dict() | |
vague = dict() | |
coordination = dict() | |
ambiguity = dict() | |
ambiguous_words = list() | |
words_set=list() | |
for i in filtered_tokens: | |
temp = model.encode(i, convert_to_tensor=True) | |
for j in lexical_AMB: | |
temp2 = lexical_encoded[j] | |
cos_sim = util.pytorch_cos_sim( | |
temp, temp2).numpy().reshape([1, ]) | |
if(cos_sim[0] >= 0.6): | |
ambiguous_words.append(i) | |
words_set.append((i,"lexical")) | |
lexical[i+"+"+j] = cos_sim[0] | |
for j in scope_AMB: | |
temp2 = scope_encoded[j] | |
cos_sim = util.pytorch_cos_sim( | |
temp, temp2).numpy().reshape([1, ]) | |
if(cos_sim[0] >= 0.6): | |
ambiguous_words.append(i) | |
words_set.append((i,"scope")) | |
scope[i+"+"+j] = cos_sim[0] | |
for j in referential_AMB: | |
temp2 = referential_encoded[j] | |
cos_sim = util.pytorch_cos_sim( | |
temp, temp2).numpy().reshape([1, ]) | |
if(cos_sim[0] >= 0.6): | |
ambiguous_words.append(i) | |
words_set.append((i,"referential")) | |
referential[i+"+"+j] = cos_sim[0] | |
for j in vague_AMB: | |
temp2 = vague_encoded[j] | |
cos_sim = util.pytorch_cos_sim( | |
temp, temp2).numpy().reshape([1, ]) | |
if(cos_sim[0] >= 0.6): | |
ambiguous_words.append(i) | |
words_set.append((i,"vague")) | |
vague[i+"+"+j] = cos_sim[0] | |
for j in coordination_AMB: | |
temp2 = coordination_encoded[j] | |
cos_sim = util.pytorch_cos_sim( | |
temp, temp2).numpy().reshape([1, ]) | |
if(cos_sim[0] >= 0.6): | |
ambiguous_words.append(i) | |
words_set.append((i,"coordination")) | |
coordination[i+"+"+j] = cos_sim[0] | |
ambiguous_words = list(dict.fromkeys(ambiguous_words)) | |
ambiguity["lexical"] = lexical | |
ambiguity["referential"] = referential | |
ambiguity["scope"] = scope | |
ambiguity["vague"] = vague | |
ambiguity["coordination"] = coordination | |
ambiguity["words"] = ambiguous_words | |
ambiguity["lexical_st"]=words_set | |
# print(filtered_tokens) | |
# print(ambiguity) | |
return ambiguity["lexical_st"] |