nontGcob commited on
Commit
01d266e
·
1 Parent(s): afd179e

fix TypeError: 'Document' object is not iterable, caused by line 76 which now became line 80-81 because we not only fix that error but also added more code before that line to preprocess text better

Browse files
Files changed (1) hide show
  1. model.py +12 -7
model.py CHANGED
@@ -61,10 +61,14 @@ def model(passage, level):
61
  # Read from the input file
62
  # with open(text_input, "r") as file:
63
  # txt = str(file.readlines()).replace("[", "").replace("'", "").replace("]", "")
64
- txt = passage + "."
 
 
 
65
 
66
- if "." in txt:
67
- txt = (txt.split("."))
 
68
  else:
69
  txt = txt
70
 
@@ -73,10 +77,11 @@ def model(passage, level):
73
  n = n.strip()
74
  ex1 = nlp(n)
75
 
76
- for word in ex1:
77
- sentence_question_tag = n.replace(word.text, f"[{word.text}]") # spacy and stanza use the same entity tag: "word.text"
78
- # text_dict[f"{word.lemma_} = {sentence_question_tag}"] = word.pos_ # this is for spacy
79
- text_dict[f"{word.lemma} = {sentence_question_tag}"] = word.upos # this is for stanza
 
80
 
81
  # Collect the tagging results (filter in just NOUN, PROPN, VERB, ADJ, or ADV only)
82
  collector = {}
 
61
  # Read from the input file
62
  # with open(text_input, "r") as file:
63
  # txt = str(file.readlines()).replace("[", "").replace("'", "").replace("]", "")
64
+ if not passage.endswith((".", "!", "?")):
65
+ txt = passage + "."
66
+ else:
67
+ txt = passage
68
 
69
+ sentence_cutters = [".", "!", "?"]
70
+ if sentence_cutters in txt:
71
+ txt = (txt.split(".").split("!").split("?"))
72
  else:
73
  txt = txt
74
 
 
77
  n = n.strip()
78
  ex1 = nlp(n)
79
 
80
+ for sentence in ex1.sentences:
81
+ for word in sentence.words:
82
+ sentence_question_tag = n.replace(word.text, f"[{word.text}]") # spacy and stanza use the same entity tag: "word.text"
83
+ # text_dict[f"{word.lemma_} = {sentence_question_tag}"] = word.pos_ # this is for spacy
84
+ text_dict[f"{word.lemma} = {sentence_question_tag}"] = word.upos # this is for stanza
85
 
86
  # Collect the tagging results (filter in just NOUN, PROPN, VERB, ADJ, or ADV only)
87
  collector = {}