Spaces:
Sleeping
Sleeping
Update grewtse/preprocessing/conllu_parser.py
Browse files
grewtse/preprocessing/conllu_parser.py
CHANGED
|
@@ -39,7 +39,6 @@ class ConlluParser:
|
|
| 39 |
for tokenlist in parse_incr(f):
|
| 40 |
# get the sentence ID in the dataset
|
| 41 |
sent_id = tokenlist.metadata["sent_id"]
|
| 42 |
-
logging.info(f"Parsing Sentence: {sent_id}")
|
| 43 |
|
| 44 |
# iterate over each token
|
| 45 |
for token in tokenlist:
|
|
@@ -102,7 +101,6 @@ class ConlluParser:
|
|
| 102 |
lemma = self.get_lemma(sentence_id, token_id)
|
| 103 |
lemma_mask = lexical_items["lemma"] == lemma
|
| 104 |
lexical_items = lexical_items[lemma_mask]
|
| 105 |
-
logging.info(f"Looking for form {lemma}")
|
| 106 |
|
| 107 |
lexical_items = construct_candidate_set(lexical_items, token_features)
|
| 108 |
# ensure that it doesn't allow minimal pairs with different start cases e.g business, Business
|
|
@@ -301,7 +299,6 @@ def construct_candidate_set(
|
|
| 301 |
for feat, value in target_features.items():
|
| 302 |
# ensure feature is a valid feature in feature set
|
| 303 |
if feat not in subset.columns:
|
| 304 |
-
print(subset.columns)
|
| 305 |
raise KeyError("Invalid feature provided to confound set: {}".format(feat))
|
| 306 |
|
| 307 |
# slim the mask down using each feature
|
|
|
|
| 39 |
for tokenlist in parse_incr(f):
|
| 40 |
# get the sentence ID in the dataset
|
| 41 |
sent_id = tokenlist.metadata["sent_id"]
|
|
|
|
| 42 |
|
| 43 |
# iterate over each token
|
| 44 |
for token in tokenlist:
|
|
|
|
| 101 |
lemma = self.get_lemma(sentence_id, token_id)
|
| 102 |
lemma_mask = lexical_items["lemma"] == lemma
|
| 103 |
lexical_items = lexical_items[lemma_mask]
|
|
|
|
| 104 |
|
| 105 |
lexical_items = construct_candidate_set(lexical_items, token_features)
|
| 106 |
# ensure that it doesn't allow minimal pairs with different start cases e.g business, Business
|
|
|
|
| 299 |
for feat, value in target_features.items():
|
| 300 |
# ensure feature is a valid feature in feature set
|
| 301 |
if feat not in subset.columns:
|
|
|
|
| 302 |
raise KeyError("Invalid feature provided to confound set: {}".format(feat))
|
| 303 |
|
| 304 |
# slim the mask down using each feature
|