Spaces:

breezedeus
/

Pix2Text-Demo

Running

App Files Files Community

breezedeus commited on May 4, 2024

Commit

bf1cc69

1 Parent(s): 62e4909

compatible with pix2text==1.1

Browse files

Files changed (3) hide show

app.py +59 -79
docs/examples/page.png +0 -0
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -50,11 +50,10 @@ def prepare_mfd_model():
 def get_p2t_model(lan_list: list, mfd_model_name: str, mfr_model_name: str):
-    analyzer_config = {}
     if 'yolov7_tiny' not in mfd_model_name:
         mfd_fp = prepare_mfd_model()
-        analyzer_config = dict(  # 声明 LayoutAnalyzer 的初始化参数
-            model_name='mfd',
             model_type='yolov7',  # 表示使用的是 YoloV7 模型，而不是 YoloV7_Tiny 模型
             model_fp=mfd_fp,  # 注：修改成你的模型文件所存储的路径
         )
@@ -63,11 +62,14 @@ def get_p2t_model(lan_list: list, mfd_model_name: str, mfr_model_name: str):
         formula_config = dict(  # 声明 LayoutAnalyzer 的初始化参数
             model_name='mfr-pro', model_backend='onnx',
         )
-    p2t = Pix2Text(
-        languages=lan_list,
-        analyzer_config=analyzer_config,
-        formula_config=formula_config,
     )
     return p2t
@@ -82,32 +84,41 @@ def recognize(
     lang_list = [LANGUAGES[l] for l in lang_list]
     p2t = get_p2t_model(lang_list, mfd_model_name, mfr_model_name)
-    if rec_type == 'mixed':
         suffix = list(string.ascii_letters)
         random.shuffle(suffix)
         suffix = ''.join(suffix[:6])
         out_det_fp = f'out-det-{time.time()}-{suffix}.jpg'
-        # 如果 OUTPUT_RESULT_DIR 文件数量超过 1000，按时间删除最早的 1000 个文件
-        if len(os.listdir(OUTPUT_RESULT_DIR)) > 1000:
-            for fp in sorted(os.listdir(OUTPUT_RESULT_DIR))[:1000]:
-                os.remove(OUTPUT_RESULT_DIR / fp)
-        outs = p2t.recognize(
-            image_file,
-            resized_shape=resized_shape,
-            save_analysis_res=OUTPUT_RESULT_DIR / out_det_fp,
-        )
-        # To get just the text contents, use:
-        only_text = merge_line_texts(outs, auto_line_break=True)
-        # return only_text, latex_render(only_text)
-        return only_text, str(OUTPUT_RESULT_DIR / out_det_fp)
-    elif rec_type == 'formula':
-        only_text = p2t.recognize_formula(image_file)
-        return latex_render(only_text), './docs/no-det-res.jpg'
-    elif rec_type == 'text':
-        only_text = p2t.recognize_text(image_file)
-        return only_text, './docs/no-det-res.jpg'
 def example_func(lang_list, rec_type, image_file):
@@ -127,63 +138,36 @@ def main():
     title = ': a Free Alternative to Mathpix'
     examples = [
-        [
-            ['English'],
-            'mixed',
-            'docs/examples/mixed-en.jpg',
-        ],
-        [
-            ['English', 'Chinese Simplified'],
-            'mixed',
-            'docs/examples/mixed-ch_sim.jpg',
-        ],
         [
             ['English', 'Chinese Traditional'],
-            'mixed',
             'docs/examples/mixed-ch_tra.jpg',
         ],
-        [
-            ['English', 'Vietnamese'],
-            'mixed',
-            'docs/examples/mixed-vietnamese.jpg',
-        ],
-        [
-            ['English'],
-            'formula',
-            'docs/examples/formula1.png'
-        ],
-        [
-            ['English'],
-            'formula',
-            'docs/examples/formula2.jpg'
-        ],
-        [
-            ['English'],
-            'formula',
-            'docs/examples/hw-formula.png'
-        ],
-        [
-            ['English', 'Chinese Simplified'],
-            'text',
-            'docs/examples/pure-text.jpg',
-        ],
     ]
     table_desc = """
 <div align="center">
-<img src="https://www.notion.so/image/https%3A%2F%2Fprod-files-secure.s3.us-west-2.amazonaws.com%2F9341931a-53f0-48e1-b026-0f1ad17b457c%2Fd0e55da8-36a5-482c-bea6-c389e2fcacea%2FUntitled.png?table=block&id=caebb37a-e23f-49ab-9687-2cba3801992e" width="120px"/>
-[![Visitors](https://api.visitorbadge.io/api/visitors?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2Fbreezedeus%2Fpix2text-demo&labelColor=%23697689&countColor=%23f5c791&style=flat&labelStyle=upper)](https://visitorbadge.io/status?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2Fbreezedeus%2FCnOCR-Demo)
-[![Discord](https://img.shields.io/discord/1200765964434821260?logo=discord&label=Discord)](https://discord.gg/drT8H85Y)
 |                                 |                                         |
 | ------------------------------- | --------------------------------------- |
 | 🏄 **Online Service**             | [p2t.breezedeus.com](https://p2t.breezedeus.com) |
 | 📀 **Code**              | [Github](https://github.com/breezedeus/pix2text) |
-| 🤗 **MFR Model**              | [breezedeus/pix2text-mfr](https://huggingface.co/breezedeus/pix2text-mfr) |
-| 📄 **More Infos**              | [breezedeus.com/pix2text](https://www.breezedeus.com/pix2text) |
-| 👨🏻‍💻 **Author**            | [Breezedeus](https://www.breezedeus.com) |
 If useful, please help to **star 🌟 [Pix2Text](https://github.com/breezedeus/pix2text)** 🙏
 </div>
@@ -191,7 +175,7 @@ If useful, please help to **star 🌟 [Pix2Text](https://github.com/breezedeus/p
     with gr.Blocks() as demo:
         gr.HTML(
-            f'<h1 style="text-align: center; margin-bottom: 1rem;"><a href="https://github.com/breezedeus/pix2text" target="_blank">Pix2Text V1.0</a>{title}</h1>'
         )
         with gr.Row(equal_height=False):
             with gr.Column(min_width=200, variant='panel', scale=3):
@@ -214,9 +198,9 @@ If useful, please help to **star 🌟 [Pix2Text](https://github.com/breezedeus/p
                     value='mfr-pro (paid)',
                 )
                 rec_type = gr.Dropdown(
-                    label='Image Type',
-                    choices=['mixed', 'formula', 'text'],
-                    value='mixed',
                     # info='Which type of image to be recognized.',
                 )
                 with gr.Accordion('More Options', open=False):
@@ -273,11 +257,7 @@ If useful, please help to **star 🌟 [Pix2Text](https://github.com/breezedeus/p
         gr.Examples(
             label='Examples',
             examples=examples,
-            inputs=[
-                lang_list,
-                rec_type,
-                image_file,
-            ],
             outputs=[rec_result, det_result],
             fn=example_func,
             cache_examples=os.getenv('CACHE_EXAMPLES') == '1',

 def get_p2t_model(lan_list: list, mfd_model_name: str, mfr_model_name: str):
+    mfd_config = {}
     if 'yolov7_tiny' not in mfd_model_name:
         mfd_fp = prepare_mfd_model()
+        mfd_config = dict(  # 声明 LayoutAnalyzer 的初始化参数
             model_type='yolov7',  # 表示使用的是 YoloV7 模型，而不是 YoloV7_Tiny 模型
             model_fp=mfd_fp,  # 注：修改成你的模型文件所存储的路径
         )
         formula_config = dict(  # 声明 LayoutAnalyzer 的初始化参数
             model_name='mfr-pro', model_backend='onnx',
         )
+    text_formula_config = dict(
+        languages=lan_list, mfd=mfd_config, formula=formula_config,
     )
+    total_config = {
+        'layout': {'scores_thresh': 0.45},
+        'text_formula': text_formula_config,
+    }
+    p2t = Pix2Text.from_config(total_configs=total_config,)
     return p2t
     lang_list = [LANGUAGES[l] for l in lang_list]
     p2t = get_p2t_model(lang_list, mfd_model_name, mfr_model_name)
+    # 如果 OUTPUT_RESULT_DIR 文件数量超过 100，按时间删除最早的 100 个文件
+    if len(os.listdir(OUTPUT_RESULT_DIR)) > 100:
+        shutil.rmtree(OUTPUT_RESULT_DIR)
+        OUTPUT_RESULT_DIR.mkdir(exist_ok=True)
+    out_det_fp = './docs/no-det-res.jpg'
+    kwargs = dict(
+        resized_shape=resized_shape,
+        return_text = True,
+        auto_line_break = True,
+    )
+    if rec_type == 'page':
+        suffix = list(string.ascii_letters)
+        random.shuffle(suffix)
+        suffix = ''.join(suffix[:6])
+        fp_suffix = f'{time.time()}-{suffix}'
+        out_debug_dir = f'out-debug-{fp_suffix}'
+        output_dir = OUTPUT_RESULT_DIR / f'output-{fp_suffix}'
+        kwargs['save_debug_res'] = OUTPUT_RESULT_DIR / out_debug_dir
+    elif rec_type == 'text_formula':
         suffix = list(string.ascii_letters)
         random.shuffle(suffix)
         suffix = ''.join(suffix[:6])
         out_det_fp = f'out-det-{time.time()}-{suffix}.jpg'
+        kwargs['save_analysis_res'] = str(OUTPUT_RESULT_DIR / out_det_fp)
+    out = p2t.recognize(image_file, file_type=rec_type, **kwargs)
+    out_text = out
+    if rec_type == 'page':
+        out_text = out.to_markdown(output_dir)
+        out_det_fp =kwargs['save_debug_res'] / 'layout_res.jpg'
+    elif rec_type == 'text_formula':
+        out_det_fp = kwargs['save_analysis_res']
+    return out_text, out_det_fp
 def example_func(lang_list, rec_type, image_file):
     title = ': a Free Alternative to Mathpix'
     examples = [
+        [['English'], 'page', 'docs/examples/page.png',],
+        [['English'], 'text_formula', 'docs/examples/mixed-en.jpg',],
+        [['English', 'Chinese Simplified'], 'text_formula', 'docs/examples/mixed-ch_sim.jpg',],
         [
             ['English', 'Chinese Traditional'],
+            'text_formula',
             'docs/examples/mixed-ch_tra.jpg',
         ],
+        [['English', 'Vietnamese'], 'text_formula', 'docs/examples/mixed-vietnamese.jpg',],
+        [['English'], 'formula', 'docs/examples/formula1.png'],
+        [['English'], 'formula', 'docs/examples/formula2.jpg'],
+        [['English'], 'formula', 'docs/examples/hw-formula.png'],
+        [['English', 'Chinese Simplified'], 'text', 'docs/examples/pure-text.jpg',],
     ]
     table_desc = """
 <div align="center">
+<img src="https://pix2text.readthedocs.io/zh/latest/figs/p2t-logo.png" width="120px"/>
+[![Visitors](https://api.visitorbadge.io/api/visitors?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2Fbreezedeus%2Fpix2text-demo&labelColor=%23697689&countColor=%23f5c791&style=flat&labelStyle=upper)](https://visitorbadge.io/status?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2Fbreezedeus%2Fpix2text-demo)
+[![Discord](https://img.shields.io/discord/1200765964434821260?logo=discord&label=Discord)](https://discord.gg/GgD87WM8Tf)
 |                                 |                                         |
 | ------------------------------- | --------------------------------------- |
 | 🏄 **Online Service**             | [p2t.breezedeus.com](https://p2t.breezedeus.com) |
+| 📖 **Doc**              | [Online Doc](https://pix2text.readthedocs.io) |
 | 📀 **Code**              | [Github](https://github.com/breezedeus/pix2text) |
+| 🤗 **Models**              | [breezedeus/Models](https://huggingface.co/breezedeus) |
+| 📄 **More Infos**              | [Pix2Text Infos](https://www.breezedeus.com/article/pix2text) |
 If useful, please help to **star 🌟 [Pix2Text](https://github.com/breezedeus/pix2text)** 🙏
 </div>
     with gr.Blocks() as demo:
         gr.HTML(
+            f'<h1 style="text-align: center; margin-bottom: 1rem;"><a href="https://github.com/breezedeus/pix2text" target="_blank">Pix2Text V1.1</a>{title}</h1>'
         )
         with gr.Row(equal_height=False):
             with gr.Column(min_width=200, variant='panel', scale=3):
                     value='mfr-pro (paid)',
                 )
                 rec_type = gr.Dropdown(
+                    label='File Type',
+                    choices=['page', 'text_formula', 'formula', 'text'],
+                    value='text_formula',
                     # info='Which type of image to be recognized.',
                 )
                 with gr.Accordion('More Options', open=False):
         gr.Examples(
             label='Examples',
             examples=examples,
+            inputs=[lang_list, rec_type, image_file,],
             outputs=[rec_result, det_result],
             fn=example_func,
             cache_examples=os.getenv('CACHE_EXAMPLES') == '1',

docs/examples/page.png ADDED Viewed

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
 --extra-index-url https://pypi.org/simple
 pyyaml
-pix2text[multilingual]>=1.0

 --extra-index-url https://pypi.org/simple
 pyyaml
+pix2text[multilingual]>=1.1