Spaces:
Runtime error
Runtime error
File size: 5,950 Bytes
c308f77 02f3832 fb8db0f 02f3832 fb8db0f 02f3832 6e82d4a 41a34cd 02f3832 41a34cd 02f3832 41a34cd 6e82d4a 41a34cd 6e82d4a 41a34cd 6e82d4a 41a34cd 6e82d4a 41a34cd 02f3832 41a34cd 6e82d4a 41a34cd 02f3832 fb8db0f 8ab1767 41a34cd 02f3832 41a34cd 8ab1767 41a34cd 02f3832 6e82d4a fb8db0f 41a34cd fb8db0f 41a34cd fb8db0f 6e82d4a fb8db0f 41a34cd 02f3832 6e82d4a fb8db0f 41a34cd fb8db0f 02f3832 6e82d4a fb8db0f 6e82d4a fb8db0f 41a34cd 02f3832 fb8db0f 02f3832 6e82d4a fb8db0f 41a34cd 8ab1767 41a34cd fb8db0f 41a34cd fb8db0f 02f3832 fb8db0f 41a34cd fb8db0f 41a34cd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
from constants import DATA_DIR, LATEX_PATH, PDFLATEX, GHOSTSCRIPT
import json
from multiprocessing import Pool
import os
import shutil
import string
import subprocess
import random
import tqdm
def generate_equation(latex, size, max_depth):
"""
Generates a random latex equation
-------
params:
:latex: -- dict with tokens to generate equation from
:size: -- approximate size of equation
:max_depth: -- max brackets and scope depth
"""
tokens, pairs, scopes = latex["tokens"], latex["pairs"], latex["scope_manipulators"]
def _generate_equation_recursive(size_left=size, depth_used=0):
if size_left <= 0:
return ""
equation = ""
group, = random.choices([tokens, pairs, scopes],
weights=[max_depth + 1, max_depth > depth_used, max_depth > depth_used])
if group is tokens:
equation += " ".join([
random.choice(tokens),
_generate_equation_recursive(size_left - 1, depth_used)
])
return equation
post_scope_size = round(abs(random.gauss(0, size_left / 2)))
size_left -= post_scope_size + 1
if group is pairs:
pair = random.choice(pairs)
equation += " ".join([
pair[0],
_generate_equation_recursive(size_left, depth_used + 1),
pair[1],
_generate_equation_recursive(post_scope_size, depth_used)
])
return equation
elif group is scopes:
scope_type, scope_group = random.choice(list(scopes.items()))
scope_operator = random.choice(scope_group)
equation += scope_operator
if scope_type == "single":
equation += "{ " + _generate_equation_recursive(size_left, depth_used + 1)
elif scope_type == "double_no_delimiters":
equation += "{ " + _generate_equation_recursive(size_left // 2, depth_used + 1) + " } { " + \
_generate_equation_recursive(size_left // 2, depth_used + 1)
elif scope_type == "double_with_delimiters":
equation += "^ { " + _generate_equation_recursive(size_left // 2, depth_used + 1) + " } _ { " + \
_generate_equation_recursive(size_left // 2, depth_used + 1)
equation += _generate_equation_recursive(post_scope_size, depth_used) + " }"
return equation
return _generate_equation_recursive()
def generate_image(directory, latex, filename, max_depth, equation_length, distribution_fraction):
"""
Generates a random tex file and corresponding image
-------
params:
:directory: -- dir where to save files
:latex: -- dict with parameters to generate tex
:filename: -- absolute filename for the generated files
:max_depth: -- max nested level of tex scopes
:equation_length: -- max length of equation
:distribution_fraction: -- fraction of whole available tex tokens to use
"""
fracture = lambda sequence: sequence[:max(1, int(len(sequence) * distribution_fraction))]
for group in ["tokens", "pairs", "fonts", "font_sizes"]:
latex[group] = fracture(latex[group])
for key, value in list(latex["scope_manipulators"].items()):
latex["scope_manipulators"]['key'] = fracture(value)
size = random.randint((equation_length + 1) // 2, equation_length)
equation = generate_equation(latex, size=size, max_depth=max_depth)
font, font_options = random.choice(latex["fonts"])
font_option = random.choice([""] + font_options)
font_size = random.choice(latex["font_sizes"])
template = string.Template(latex["template"])
tex = template.substitute(font=font, font_option=font_option, fontsize=font_size, equation=equation)
filepath = os.path.join(directory, filename)
with open(f"{filepath}.tex", mode="w") as file:
file.write(tex)
try:
pdflatex_process = subprocess.run(
f"{PDFLATEX} -output-directory={directory} {filepath}.tex".split(),
stderr=subprocess.DEVNULL,
stdout=subprocess.DEVNULL,
timeout=1
)
except subprocess.TimeoutExpired:
os.remove(filepath + ".tex")
return
if pdflatex_process.returncode != 0:
os.remove(filepath + ".tex")
return
subprocess.run(
f"{GHOSTSCRIPT} -sDEVICE=png16m -dTextAlphaBits=4 -r200 -dSAFER -dBATCH -dNOPAUSE"
f" -o {filepath}.png {filepath}.pdf".split(),
stderr=subprocess.DEVNULL,
stdout=subprocess.DEVNULL,
)
def _generate_image_wrapper(args):
return generate_image(*args)
def generate_data(examples_count, max_depth, equation_length, distribution_fraction) -> None:
"""
Clears a directory and generates a latex dataset in given directory
"""
directory = os.path.abspath(DATA_DIR)
shutil.rmtree(DATA_DIR)
os.mkdir(DATA_DIR)
with open(LATEX_PATH) as file:
latex = json.load(file)
filenames = set(f"{i:0{len(str(examples_count - 1))}d}" for i in range(examples_count))
files_before = set(os.listdir())
while filenames:
with Pool() as pool:
list(tqdm.tqdm(
pool.imap(_generate_image_wrapper,
((directory, latex, filename, max_depth, equation_length, distribution_fraction) for filename
in sorted(filenames))),
"Generating images",
total=len(filenames)
))
filenames -= set(
os.path.splitext(filename)[0] for filename in os.listdir(directory) if filename.endswith(".png"))
for file in set(i.path for i in os.scandir(DATA_DIR)) | set(os.listdir()) - files_before:
if any(file.endswith(ext) for ext in [".aux", ".pdf", ".log", ".sh"]):
os.remove(file)
|