Spaces:

nontGcob
/

T2E_Vocabulary_Exam_Generator

Sleeping

nontGcob commited on Oct 16, 2024

Commit

01d266e

1 Parent(s): afd179e

fix TypeError: 'Document' object is not iterable, caused by line 76 which now became line 80-81 because we not only fix that error but also added more code before that line to preprocess text better

Files changed (1) hide show

model.py CHANGED Viewed

@@ -61,10 +61,14 @@ def model(passage, level):
   # Read from the input file
   # with open(text_input, "r") as file:
   #   txt = str(file.readlines()).replace("[", "").replace("'", "").replace("]", "")
-  txt = passage + "."
-  if "." in txt:
-    txt = (txt.split("."))
   else:
     txt = txt
@@ -73,10 +77,11 @@ def model(passage, level):
     n = n.strip()
     ex1 = nlp(n)
-    for word in ex1:
-      sentence_question_tag = n.replace(word.text, f"[{word.text}]") # spacy and stanza use the same entity tag: "word.text"
-      # text_dict[f"{word.lemma_} = {sentence_question_tag}"] = word.pos_ # this is for spacy
-      text_dict[f"{word.lemma} = {sentence_question_tag}"] = word.upos # this is for stanza
   # Collect the tagging results (filter in just NOUN, PROPN, VERB, ADJ, or ADV only)
   collector = {}

   # Read from the input file
   # with open(text_input, "r") as file:
   #   txt = str(file.readlines()).replace("[", "").replace("'", "").replace("]", "")
+  if not passage.endswith((".", "!", "?")):
+    txt = passage + "."
+  else:
+    txt = passage
+  sentence_cutters = [".", "!", "?"]
+  if sentence_cutters in txt:
+    txt = (txt.split(".").split("!").split("?"))
   else:
     txt = txt
     n = n.strip()
     ex1 = nlp(n)
+    for sentence in ex1.sentences:
+      for word in sentence.words:
+        sentence_question_tag = n.replace(word.text, f"[{word.text}]") # spacy and stanza use the same entity tag: "word.text"
+        # text_dict[f"{word.lemma_} = {sentence_question_tag}"] = word.pos_ # this is for spacy
+        text_dict[f"{word.lemma} = {sentence_question_tag}"] = word.upos # this is for stanza
   # Collect the tagging results (filter in just NOUN, PROPN, VERB, ADJ, or ADV only)
   collector = {}