fix TypeError: 'Document' object is not iterable, caused by line 76 which now became line 80-81 because we not only fix that error but also added more code before that line to preprocess text better
Browse files
model.py
CHANGED
@@ -61,10 +61,14 @@ def model(passage, level):
|
|
61 |
# Read from the input file
|
62 |
# with open(text_input, "r") as file:
|
63 |
# txt = str(file.readlines()).replace("[", "").replace("'", "").replace("]", "")
|
64 |
-
|
|
|
|
|
|
|
65 |
|
66 |
-
|
67 |
-
|
|
|
68 |
else:
|
69 |
txt = txt
|
70 |
|
@@ -73,10 +77,11 @@ def model(passage, level):
|
|
73 |
n = n.strip()
|
74 |
ex1 = nlp(n)
|
75 |
|
76 |
-
for
|
77 |
-
|
78 |
-
|
79 |
-
|
|
|
80 |
|
81 |
# Collect the tagging results (filter in just NOUN, PROPN, VERB, ADJ, or ADV only)
|
82 |
collector = {}
|
|
|
61 |
# Read from the input file
|
62 |
# with open(text_input, "r") as file:
|
63 |
# txt = str(file.readlines()).replace("[", "").replace("'", "").replace("]", "")
|
64 |
+
if not passage.endswith((".", "!", "?")):
|
65 |
+
txt = passage + "."
|
66 |
+
else:
|
67 |
+
txt = passage
|
68 |
|
69 |
+
sentence_cutters = [".", "!", "?"]
|
70 |
+
if sentence_cutters in txt:
|
71 |
+
txt = (txt.split(".").split("!").split("?"))
|
72 |
else:
|
73 |
txt = txt
|
74 |
|
|
|
77 |
n = n.strip()
|
78 |
ex1 = nlp(n)
|
79 |
|
80 |
+
for sentence in ex1.sentences:
|
81 |
+
for word in sentence.words:
|
82 |
+
sentence_question_tag = n.replace(word.text, f"[{word.text}]") # spacy and stanza use the same entity tag: "word.text"
|
83 |
+
# text_dict[f"{word.lemma_} = {sentence_question_tag}"] = word.pos_ # this is for spacy
|
84 |
+
text_dict[f"{word.lemma} = {sentence_question_tag}"] = word.upos # this is for stanza
|
85 |
|
86 |
# Collect the tagging results (filter in just NOUN, PROPN, VERB, ADJ, or ADV only)
|
87 |
collector = {}
|