Spaces:
Runtime error
Runtime error
import argparse | |
# Apply the edits of a single annotator to generate the corrected sentences. | |
def main(args): | |
m2 = open(args.m2_file).read().strip().split("\n\n") | |
out = open(args.out, "w") | |
# Do not apply edits with these error types | |
skip = {"noop", "UNK", "Um"} | |
for sent in m2: | |
sent = sent.split("\n") | |
cor_sent = sent[0].split()[1:] # Ignore "S " | |
edits = sent[1:] | |
offset = 0 | |
for edit in edits: | |
edit = edit.split("|||") | |
if edit[1] in skip: continue # Ignore certain edits | |
coder = int(edit[-1]) | |
if coder != args.id: continue # Ignore other coders | |
span = edit[0].split()[1:] # Ignore "A " | |
start = int(span[0]) | |
end = int(span[1]) | |
cor = edit[2].split() | |
cor_sent[start+offset:end+offset] = cor | |
offset = offset-(end-start)+len(cor) | |
out.write(" ".join(cor_sent)+"\n") | |
if __name__ == "__main__": | |
# Define and parse program input | |
parser = argparse.ArgumentParser() | |
parser.add_argument("m2_file", help="The path to an input m2 file.") | |
parser.add_argument("-out", help="A path to where we save the output corrected text file.", required=True) | |
parser.add_argument("-id", help="The id of the target annotator in the m2 file.", type=int, default=0) | |
args = parser.parse_args() | |
main(args) | |