ambiguity-detection / .ipynb_checkpoints /detector-checkpoint.py
hasanriaz121
ambiguity detection added
98eb826
raw
history blame contribute delete
No virus
3.47 kB
import nltk
from nltk.tokenize import word_tokenize
from sentence_transformers import SentenceTransformer, util
import numpy
# from nltk.stem import WordNetLemmatizer
import pickle
import re
nltk.download('punkt')
class AmbguityDetector:
def __init__(self):
self.model = SentenceTransformer(
'sentence-transformers/all-MiniLM-L6-v2')
def sentence_ambiguity(self, sentence):
model = self.model
tokens = word_tokenize(sentence)
filtered_tokens = list()
for token in tokens:
if token not in stopwords_custom:
filtered_tokens.append(token)
for i in filtered_tokens:
filtered_tokens[filtered_tokens.index(i)] = i.lower()
if i in punctuation:
filtered_tokens.remove(i)
lexical = dict()
scope = dict()
referential = dict()
vague = dict()
coordination = dict()
ambiguity = dict()
ambiguous_words = list()
words_set=list()
for i in filtered_tokens:
temp = model.encode(i, convert_to_tensor=True)
for j in lexical_AMB:
temp2 = lexical_encoded[j]
cos_sim = util.pytorch_cos_sim(
temp, temp2).numpy().reshape([1, ])
if(cos_sim[0] >= 0.6):
ambiguous_words.append(i)
words_set.append((i,"lexical"))
lexical[i+"+"+j] = cos_sim[0]
for j in scope_AMB:
temp2 = scope_encoded[j]
cos_sim = util.pytorch_cos_sim(
temp, temp2).numpy().reshape([1, ])
if(cos_sim[0] >= 0.6):
ambiguous_words.append(i)
words_set.append((i,"scope"))
scope[i+"+"+j] = cos_sim[0]
for j in referential_AMB:
temp2 = referential_encoded[j]
cos_sim = util.pytorch_cos_sim(
temp, temp2).numpy().reshape([1, ])
if(cos_sim[0] >= 0.6):
ambiguous_words.append(i)
words_set.append((i,"referential"))
referential[i+"+"+j] = cos_sim[0]
for j in vague_AMB:
temp2 = vague_encoded[j]
cos_sim = util.pytorch_cos_sim(
temp, temp2).numpy().reshape([1, ])
if(cos_sim[0] >= 0.6):
ambiguous_words.append(i)
words_set.append((i,"vague"))
vague[i+"+"+j] = cos_sim[0]
for j in coordination_AMB:
temp2 = coordination_encoded[j]
cos_sim = util.pytorch_cos_sim(
temp, temp2).numpy().reshape([1, ])
if(cos_sim[0] >= 0.6):
ambiguous_words.append(i)
words_set.append((i,"coordination"))
coordination[i+"+"+j] = cos_sim[0]
ambiguous_words = list(dict.fromkeys(ambiguous_words))
ambiguity["lexical"] = lexical
ambiguity["referential"] = referential
ambiguity["scope"] = scope
ambiguity["vague"] = vague
ambiguity["coordination"] = coordination
ambiguity["words"] = ambiguous_words
ambiguity["lexical_st"]=words_set
# print(filtered_tokens)
# print(ambiguity)
return ambiguity["lexical_st"]