|
|
|
|
|
|
|
|
|
import os |
|
import json |
|
import functools |
|
import random |
|
import shutil |
|
import string |
|
import tempfile |
|
import time |
|
import zipfile |
|
from pathlib import Path |
|
|
|
import yaml |
|
|
|
import gradio as gr |
|
import numpy as np |
|
from huggingface_hub import hf_hub_download |
|
|
|
|
|
|
|
from pix2text import Pix2Text |
|
from pix2text.utils import set_logger, merge_line_texts |
|
|
|
logger = set_logger() |
|
|
|
LANGUAGES = yaml.safe_load(open('languages.yaml', 'r', encoding='utf-8'))['languages'] |
|
OUTPUT_RESULT_DIR = Path('./output-results') |
|
OUTPUT_RESULT_DIR.mkdir(exist_ok=True) |
|
|
|
|
|
def prepare_mfd_model(): |
|
target_fp = './yolov7-model/mfd-yolov7-epoch224-20230613.pt' |
|
if os.path.exists(target_fp): |
|
return target_fp |
|
HF_TOKEN = os.environ.get('HF_TOKEN') |
|
local_path = hf_hub_download( |
|
repo_id='breezedeus/paid-models', |
|
subfolder='cnstd/1.2', |
|
filename='yolov7-model-20230613.zip', |
|
repo_type="model", |
|
cache_dir='./', |
|
token=HF_TOKEN, |
|
) |
|
with zipfile.ZipFile(local_path) as zf: |
|
zf.extractall('./') |
|
return target_fp |
|
|
|
|
|
def get_p2t_model(lan_list: list, mfd_model_name: str, mfr_model_name: str): |
|
mfd_config = {} |
|
if 'yolov7_tiny' not in mfd_model_name: |
|
mfd_fp = prepare_mfd_model() |
|
mfd_config = dict( |
|
model_type='yolov7', |
|
model_fp=mfd_fp, |
|
) |
|
formula_config = {} |
|
if 'mfr-pro' in mfr_model_name: |
|
formula_config = dict( |
|
model_name='mfr-pro', model_backend='onnx', |
|
) |
|
text_formula_config = dict( |
|
languages=lan_list, mfd=mfd_config, formula=formula_config, |
|
) |
|
total_config = { |
|
'layout': {'scores_thresh': 0.45}, |
|
'text_formula': text_formula_config, |
|
} |
|
p2t = Pix2Text.from_config(total_configs=total_config,) |
|
return p2t |
|
|
|
|
|
def latex_render(latex_str): |
|
return f"$$\n{latex_str}\n$$" |
|
|
|
|
|
|
|
def recognize( |
|
lang_list, mfd_model_name, mfr_model_name, rec_type, resized_shape, image_file |
|
): |
|
lang_list = [LANGUAGES[l] for l in lang_list] |
|
p2t = get_p2t_model(lang_list, mfd_model_name, mfr_model_name) |
|
|
|
|
|
if len(os.listdir(OUTPUT_RESULT_DIR)) > 100: |
|
shutil.rmtree(OUTPUT_RESULT_DIR) |
|
OUTPUT_RESULT_DIR.mkdir(exist_ok=True) |
|
|
|
out_det_fp = './docs/no-det-res.jpg' |
|
kwargs = dict( |
|
resized_shape=resized_shape, |
|
return_text = True, |
|
auto_line_break = True, |
|
) |
|
if rec_type == 'page': |
|
suffix = list(string.ascii_letters) |
|
random.shuffle(suffix) |
|
suffix = ''.join(suffix[:6]) |
|
fp_suffix = f'{time.time()}-{suffix}' |
|
out_debug_dir = f'out-debug-{fp_suffix}' |
|
output_dir = OUTPUT_RESULT_DIR / f'output-{fp_suffix}' |
|
kwargs['save_debug_res'] = OUTPUT_RESULT_DIR / out_debug_dir |
|
elif rec_type == 'text_formula': |
|
suffix = list(string.ascii_letters) |
|
random.shuffle(suffix) |
|
suffix = ''.join(suffix[:6]) |
|
out_det_fp = f'out-det-{time.time()}-{suffix}.jpg' |
|
kwargs['save_analysis_res'] = str(OUTPUT_RESULT_DIR / out_det_fp) |
|
|
|
out = p2t.recognize(image_file, file_type=rec_type, **kwargs) |
|
out_text = out |
|
if rec_type == 'page': |
|
out_text = out.to_markdown(output_dir) |
|
out_det_fp =kwargs['save_debug_res'] / 'layout_res.jpg' |
|
elif rec_type == 'text_formula': |
|
out_det_fp = kwargs['save_analysis_res'] |
|
|
|
return out_text, out_det_fp |
|
|
|
|
|
def example_func(lang_list, rec_type, image_file): |
|
return recognize( |
|
lang_list, |
|
mfd_model_name='yolov7 (paid)', |
|
mfr_model_name='mfr-pro (paid)', |
|
rec_type=rec_type, |
|
resized_shape=768, |
|
image_file=image_file, |
|
) |
|
|
|
|
|
def main(): |
|
langs = list(LANGUAGES.keys()) |
|
langs.sort(key=lambda x: x.lower()) |
|
|
|
title = ': a Free Alternative to Mathpix' |
|
examples = [ |
|
[['English'], 'page', 'docs/examples/page.png',], |
|
[['English'], 'text_formula', 'docs/examples/mixed-en.jpg',], |
|
[['English', 'Chinese Simplified'], 'text_formula', 'docs/examples/mixed-ch_sim.jpg',], |
|
[ |
|
['English', 'Chinese Traditional'], |
|
'text_formula', |
|
'docs/examples/mixed-ch_tra.jpg', |
|
], |
|
[['English', 'Vietnamese'], 'text_formula', 'docs/examples/mixed-vietnamese.jpg',], |
|
[['English'], 'formula', 'docs/examples/formula1.png'], |
|
[['English'], 'formula', 'docs/examples/formula2.jpg'], |
|
[['English'], 'formula', 'docs/examples/hw-formula.png'], |
|
[['English', 'Chinese Simplified'], 'text', 'docs/examples/pure-text.jpg',], |
|
] |
|
|
|
table_desc = """ |
|
<div align="center"> |
|
<img src="https://pix2text.readthedocs.io/zh/latest/figs/p2t-logo.png" width="120px"/> |
|
|
|
[](https://visitorbadge.io/status?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2Fbreezedeus%2Fpix2text-demo) |
|
|
|
[](https://discord.gg/GgD87WM8Tf) |
|
|
|
| | | |
|
| ------------------------------- | --------------------------------------- | |
|
| ๐ **Online Service** | [p2t.breezedeus.com](https://p2t.breezedeus.com) | |
|
| ๐ **Doc** | [Online Doc](https://pix2text.readthedocs.io) | |
|
| ๐ **Code** | [Github](https://github.com/breezedeus/pix2text) | |
|
| ๐ค **Models** | [breezedeus/Models](https://huggingface.co/breezedeus) | |
|
| ๐ **More Infos** | [Pix2Text Infos](https://www.breezedeus.com/article/pix2text) | |
|
|
|
If useful, please help to **star ๐ [Pix2Text](https://github.com/breezedeus/pix2text)** ๐ |
|
</div> |
|
""" |
|
|
|
with gr.Blocks() as demo: |
|
gr.HTML( |
|
f'<h1 style="text-align: center; margin-bottom: 1rem;"><a href="https://github.com/breezedeus/pix2text" target="_blank">Pix2Text V1.1</a>{title}</h1>' |
|
) |
|
with gr.Row(equal_height=False): |
|
with gr.Column(min_width=200, variant='panel', scale=3): |
|
gr.Markdown('### Settings') |
|
lang_list = gr.Dropdown( |
|
label='Text Languages', |
|
choices=langs, |
|
value=['English', 'Chinese Simplified'], |
|
multiselect=True, |
|
|
|
) |
|
mfd_model_name = gr.Dropdown( |
|
label='MFD Models', |
|
choices=['yolov7_tiny (free)', 'yolov7 (paid)'], |
|
value='yolov7 (paid)', |
|
) |
|
mfr_model_name = gr.Dropdown( |
|
label='MFR Models', |
|
choices=['mfr (free)', 'mfr-pro (paid)'], |
|
value='mfr-pro (paid)', |
|
) |
|
rec_type = gr.Dropdown( |
|
label='File Type', |
|
choices=['page', 'text_formula', 'formula', 'text'], |
|
value='text_formula', |
|
|
|
) |
|
with gr.Accordion('More Options', open=False): |
|
resized_shape = gr.Slider( |
|
label='resized_shape', |
|
minimum=512, |
|
maximum=2048, |
|
value=768, |
|
step=32, |
|
) |
|
|
|
with gr.Column(scale=6, variant='compact'): |
|
gr.Markdown('### Upload Image to be Recognized') |
|
image_file = gr.Image( |
|
label='Image', type="pil", image_mode='RGB', show_label=False |
|
) |
|
sub_btn = gr.Button("Submit", variant="primary") |
|
|
|
with gr.Column(scale=2, variant='compact'): |
|
gr.Markdown(table_desc) |
|
with gr.Row(equal_height=False): |
|
with gr.Column(scale=1, variant='compact'): |
|
gr.Markdown('**Detection Result**') |
|
det_result = gr.Image( |
|
label='Detection Result', scale=1, show_label=False |
|
) |
|
with gr.Column(scale=1, variant='compact'): |
|
gr.Markdown( |
|
'**Recognition Results (Paste them into the [P2T Online Service](https://p2t.breezedeus.com) to view rendered outcomes)**' |
|
) |
|
rec_result = gr.Textbox( |
|
label=f'Recognition Result ', |
|
lines=5, |
|
value='', |
|
scale=1, |
|
show_label=False, |
|
show_copy_button=True, |
|
) |
|
|
|
|
|
sub_btn.click( |
|
recognize, |
|
inputs=[ |
|
lang_list, |
|
mfd_model_name, |
|
mfr_model_name, |
|
rec_type, |
|
resized_shape, |
|
image_file, |
|
], |
|
outputs=[rec_result, det_result], |
|
) |
|
|
|
gr.Examples( |
|
label='Examples', |
|
examples=examples, |
|
inputs=[lang_list, rec_type, image_file,], |
|
outputs=[rec_result, det_result], |
|
fn=example_func, |
|
cache_examples=os.getenv('CACHE_EXAMPLES') == '1', |
|
) |
|
|
|
demo.queue(max_size=10) |
|
demo.launch() |
|
|
|
|
|
if __name__ == '__main__': |
|
main() |
|
|