File size: 1,577 Bytes
1f1d7aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import sys, os
import uuid
import shutil
import subprocess


def normalize_formula(formula):
    unique_filename = str(uuid.uuid4()) + '.tex'
    with open(unique_filename, 'w') as fout:
        fout.write(formula)

    input_file = unique_filename
    output_file = unique_filename + '.out'
    assert os.path.exists(input_file), input_file
    cmd = "perl -pe 's|hskip(.*?)(cm\\|in\\|pt\\|mm\\|em)|hspace{\\1\\2}|g' %s > %s"%(input_file, output_file)
    ret = subprocess.call(cmd, shell=True)
    if ret != 0:
        assert False

    temp_file = output_file + '.tmp'
    with open(temp_file, 'w') as fout:  
        with open(output_file) as fin:
            for line in fin:
                fout.write(line.replace('\r', ' ').strip() + '\n')  # delete \r

    cmd = "cat %s | node preprocess_latex.js %s > %s "%(temp_file, 'normalize', output_file)
    ret = subprocess.call(cmd, shell=True)
    os.remove(temp_file)
    if ret != 0:
        assert False
    temp_file = output_file + '.tmp'
    shutil.move(output_file, temp_file)
    with open(temp_file) as fin:
        with open(output_file, 'w') as fout:
            for line in fin:
                tokens = line.strip().split()
                tokens_out = []
                for token in tokens:
                    if True or is_ascii(token):
                        tokens_out.append(token)
                fout.write(' '.join(tokens_out)+'\n')
    formula_normalized = open(output_file).read().strip()
    os.remove(temp_file)
    os.remove(input_file)
    os.remove(output_file)
    return formula_normalized