parser / udpipe2 /pdtc-1.0 /compose_deprel.py
anasampa2's picture
Upload 151 files
ee0ec3d verified
raw
history blame
No virus
3.01 kB
#!/usr/bin/env python3
import argparse
import sys
IS_MEMBER, IS_PARENTHESIS_ROOT = 5, 3
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("paths", default=[], nargs="*", help="Input paths")
parser.add_argument("--is_member", default=False, action="store_true", help="Add is_member")
parser.add_argument("--is_parenthesis_root", default=False, action="store_true", help="Add is_parenthesis_root")
args = parser.parse_args()
for path in args.paths:
with open(path, "r", encoding="utf-8") as conllu_file:
block = []
for line in conllu_file:
line = line.rstrip("\n")
if not line:
assert block
# Process block
heads, deps = {}, {}
for entry in block:
columns = entry.split("\t")
if len(columns) == 10:
assert int(columns[0]) == len(heads) + 1
deps[len(heads) + 1] = columns[7]
heads[len(heads) + 1] = columns[6]
for i in range(len(block)):
columns = block[i].split("\t")
if len(columns) == 10:
if args.is_member and columns[IS_MEMBER] == "1":
parent = int(columns[0])
while parent and deps[parent] not in ("Apos", "Coord"):
parent = int(heads[parent])
if deps[parent] == "Apos":
columns[7] += "_Ap"
elif deps[parent] == "Coord":
columns[7] += "_Co"
else:
print("Did not find correct parent of IsMember {} in the below sentence".format(block[i]), *block, file=sys.stderr, sep="\n")
columns[IS_MEMBER] = "_"
if args.is_parenthesis_root and columns[IS_PARENTHESIS_ROOT] == "1":
columns[7] += "_Pa"
columns[IS_PARENTHESIS_ROOT] = "_"
block[i] = "\t".join(columns)
# if args.is_member and columns[IS_MEMBER] == "1":
# columns[7] += "_IsMember"
# columns[IS_MEMBER] = "_"
#
# if args.is_parenthesis_root and columns[IS_PARENTHESIS_ROOT] == "1":
# columns[7] += "_Pa"
# columns[IS_PARENTHESIS_ROOT] = "_"
#
# line = "\t".join(columns)
#
# print(line)
print(*block, sep="\n", end="\n\n")
block = []
else:
block.append(line)
assert not block