Spaces:

RobPruzan
/

automaticlitassesment

Runtime error

App Files Files Community

RobPruzan commited on Aug 8, 2022

Commit

8a9f69a

•

1 Parent(s): b327954

Updating diversity scoring function

Browse files

Files changed (1) hide show

app.py +55 -48

app.py CHANGED Viewed

@@ -42,58 +42,65 @@ for idx, key in enumerate(glove_vectors.key_to_index.keys()):
 def calculate_diversity(text):
-    stop_words = set(stopwords.words('english'))
-    for i in string.punctuation:
-        stop_words.add(i)
-    tokenized_text = word_tokenize(text)
-    tokenized_text = list(map(lambda word: word.lower(), tokenized_text))
-    sim_words = {}
-    if len(tokenized_text) <= 1:
-        return 1, "More Text Required"
-    for idx, anc_word in enumerate(tokenized_text):
-        if anc_word in stop_words:
-            continue
-        if idx in sim_words:
-            sim_words[idx] = sim_words[idx]
-            continue
-        vocab = [anc_word]
-        for pos, comp_word in enumerate(tokenized_text):
-            try:
-                if not comp_word in stop_words and cosine_similarity(w2v[anc_word].reshape(1, -1),
-                                                                     w2v[comp_word].reshape(1, -1)) > .75:
-                    vocab.append(comp_word)
-                sim_words[idx] = vocab
-            except KeyError:
-                continue
-    scores = {}
-    for k, value in sim_words.items():
-        if len(value) == 1:
-            scores[k] = 1
-            continue
-        t_sim = len(value) - 1
-        t_rep = (len(value) - 1) - (len(set(value)))
-        score = ((t_sim - t_rep) / t_sim) ** 2
-        scores[key] = score
-    mean_score = 0
-    total = 0
-    for value in scores.values():
-        mean_score += value
-        total += 1
-    return scores, mean_score / total
 def dict_to_list(dictionary, max_size=10):

 def calculate_diversity(text):
+  stop_words = set(stopwords.words('english'))
+  for i in string.punctuation:
+    stop_words.add(i)
+  tokenized_text = word_tokenize(text)
+  tokenized_text = list(map(lambda word: word.lower(), tokenized_text))
+  sim_words = {}
+  if len(tokenized_text) <= 1:
+    return 1,"More Text Required"
+  for idx, anc_word in enumerate(tokenized_text):
+    if anc_word in stop_words:
+      continue
+    vocab = [anc_word]
+    for pos, comp_word in enumerate(tokenized_text):
+      if anc_word in sim_words.get(pos, []):
+        if anc_word == sim_words[pos][0]:
+          sim_words[idx] = sim_words[pos]
+          continue
+      try:
+        if not comp_word in stop_words and cosine_similarity(w2v[anc_word].reshape(1, -1), w2v[comp_word].reshape(1, -1)) > .75:
+          vocab.append(comp_word)
+        sim_words[idx] = vocab
+      except KeyError:
+        continue
+  scores = {}
+  for key, value in sim_words.items():
+    if len(value) == 1:
+      scores[key] = 1
+      continue
+    if len(value) == 2:
+      scores[key] = -1
+      continue
+    t_sim = len(value) - 1
+    t_rep = (len(value) - 1) - (len(set(value[1:])))
+    score = ((t_sim - t_rep)/t_sim)**2
+    scores[key] = score
+  mean_score = 0
+  total = 0
+  for value in scores.values():
+    if value == -1:
+      continue
+    mean_score += value
+    total += 1
+  return scores, mean_score/total
 def dict_to_list(dictionary, max_size=10):