File size: 1,015 Bytes
847e3e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import argparse
import re

from helpers import write_lines


def filter_line(line):
    if "-LRB-" in line and "-RRB-" in line:
        rep = re.sub(r'\-.*?LRB.*?\-.*?\-.*?RRB.*?\-', '', line)
        line_cleaned = rep
    elif ("-LRB-" in line and "-RRB-" not in line) or (
            "-LRB-" not in line and "-RRB-" in line):
        line_cleaned = line.replace("-LRB-", '"').replace("-RRB-", '"')
    else:
        line_cleaned = line
    return line_cleaned


def main(args):
    with open(args.source) as f:
        data = [row.rstrip() for row in f]
    
    write_lines(args.output, [filter_line(row) for row in data])


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('-s', '--source',
                        help='Path to the source file',
                        required=True)
    parser.add_argument('-o', '--output',
                        help='Path to the output file',
                        required=True)
    args = parser.parse_args()
    main(args)