Spaces:
Running
Running
import argparse | |
import re | |
from helpers import write_lines | |
def filter_line(line): | |
if "-LRB-" in line and "-RRB-" in line: | |
rep = re.sub(r'\-.*?LRB.*?\-.*?\-.*?RRB.*?\-', '', line) | |
line_cleaned = rep | |
elif ("-LRB-" in line and "-RRB-" not in line) or ( | |
"-LRB-" not in line and "-RRB-" in line): | |
line_cleaned = line.replace("-LRB-", '"').replace("-RRB-", '"') | |
else: | |
line_cleaned = line | |
return line_cleaned | |
def main(args): | |
with open(args.source) as f: | |
data = [row.rstrip() for row in f] | |
write_lines(args.output, [filter_line(row) for row in data]) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument('-s', '--source', | |
help='Path to the source file', | |
required=True) | |
parser.add_argument('-o', '--output', | |
help='Path to the output file', | |
required=True) | |
args = parser.parse_args() | |
main(args) |