breezedeus commited on
Commit
bf1cc69
1 Parent(s): 62e4909

compatible with pix2text==1.1

Browse files
Files changed (3) hide show
  1. app.py +59 -79
  2. docs/examples/page.png +0 -0
  3. requirements.txt +1 -1
app.py CHANGED
@@ -50,11 +50,10 @@ def prepare_mfd_model():
50
 
51
 
52
  def get_p2t_model(lan_list: list, mfd_model_name: str, mfr_model_name: str):
53
- analyzer_config = {}
54
  if 'yolov7_tiny' not in mfd_model_name:
55
  mfd_fp = prepare_mfd_model()
56
- analyzer_config = dict( # 声明 LayoutAnalyzer 的初始化参数
57
- model_name='mfd',
58
  model_type='yolov7', # 表示使用的是 YoloV7 模型,而不是 YoloV7_Tiny 模型
59
  model_fp=mfd_fp, # 注:修改成你的模型文件所存储的路径
60
  )
@@ -63,11 +62,14 @@ def get_p2t_model(lan_list: list, mfd_model_name: str, mfr_model_name: str):
63
  formula_config = dict( # 声明 LayoutAnalyzer 的初始化参数
64
  model_name='mfr-pro', model_backend='onnx',
65
  )
66
- p2t = Pix2Text(
67
- languages=lan_list,
68
- analyzer_config=analyzer_config,
69
- formula_config=formula_config,
70
  )
 
 
 
 
 
71
  return p2t
72
 
73
 
@@ -82,32 +84,41 @@ def recognize(
82
  lang_list = [LANGUAGES[l] for l in lang_list]
83
  p2t = get_p2t_model(lang_list, mfd_model_name, mfr_model_name)
84
 
85
- if rec_type == 'mixed':
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  suffix = list(string.ascii_letters)
87
  random.shuffle(suffix)
88
  suffix = ''.join(suffix[:6])
89
  out_det_fp = f'out-det-{time.time()}-{suffix}.jpg'
90
- # 如果 OUTPUT_RESULT_DIR 文件数量超过 1000,按时间删除最早的 1000 个文件
91
- if len(os.listdir(OUTPUT_RESULT_DIR)) > 1000:
92
- for fp in sorted(os.listdir(OUTPUT_RESULT_DIR))[:1000]:
93
- os.remove(OUTPUT_RESULT_DIR / fp)
94
-
95
- outs = p2t.recognize(
96
- image_file,
97
- resized_shape=resized_shape,
98
- save_analysis_res=OUTPUT_RESULT_DIR / out_det_fp,
99
- )
100
- # To get just the text contents, use:
101
- only_text = merge_line_texts(outs, auto_line_break=True)
102
 
103
- # return only_text, latex_render(only_text)
104
- return only_text, str(OUTPUT_RESULT_DIR / out_det_fp)
105
- elif rec_type == 'formula':
106
- only_text = p2t.recognize_formula(image_file)
107
- return latex_render(only_text), './docs/no-det-res.jpg'
108
- elif rec_type == 'text':
109
- only_text = p2t.recognize_text(image_file)
110
- return only_text, './docs/no-det-res.jpg'
111
 
112
 
113
  def example_func(lang_list, rec_type, image_file):
@@ -127,63 +138,36 @@ def main():
127
 
128
  title = ': a Free Alternative to Mathpix'
129
  examples = [
130
- [
131
- ['English'],
132
- 'mixed',
133
- 'docs/examples/mixed-en.jpg',
134
- ],
135
- [
136
- ['English', 'Chinese Simplified'],
137
- 'mixed',
138
- 'docs/examples/mixed-ch_sim.jpg',
139
- ],
140
  [
141
  ['English', 'Chinese Traditional'],
142
- 'mixed',
143
  'docs/examples/mixed-ch_tra.jpg',
144
  ],
145
- [
146
- ['English', 'Vietnamese'],
147
- 'mixed',
148
- 'docs/examples/mixed-vietnamese.jpg',
149
- ],
150
- [
151
- ['English'],
152
- 'formula',
153
- 'docs/examples/formula1.png'
154
- ],
155
- [
156
- ['English'],
157
- 'formula',
158
- 'docs/examples/formula2.jpg'
159
- ],
160
- [
161
- ['English'],
162
- 'formula',
163
- 'docs/examples/hw-formula.png'
164
- ],
165
- [
166
- ['English', 'Chinese Simplified'],
167
- 'text',
168
- 'docs/examples/pure-text.jpg',
169
- ],
170
  ]
171
 
172
  table_desc = """
173
  <div align="center">
174
- <img src="https://www.notion.so/image/https%3A%2F%2Fprod-files-secure.s3.us-west-2.amazonaws.com%2F9341931a-53f0-48e1-b026-0f1ad17b457c%2Fd0e55da8-36a5-482c-bea6-c389e2fcacea%2FUntitled.png?table=block&id=caebb37a-e23f-49ab-9687-2cba3801992e" width="120px"/>
175
 
176
- [![Visitors](https://api.visitorbadge.io/api/visitors?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2Fbreezedeus%2Fpix2text-demo&labelColor=%23697689&countColor=%23f5c791&style=flat&labelStyle=upper)](https://visitorbadge.io/status?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2Fbreezedeus%2FCnOCR-Demo)
177
 
178
- [![Discord](https://img.shields.io/discord/1200765964434821260?logo=discord&label=Discord)](https://discord.gg/drT8H85Y)
179
 
180
  | | |
181
  | ------------------------------- | --------------------------------------- |
182
  | 🏄 **Online Service** | [p2t.breezedeus.com](https://p2t.breezedeus.com) |
 
183
  | 📀 **Code** | [Github](https://github.com/breezedeus/pix2text) |
184
- | 🤗 **MFR Model** | [breezedeus/pix2text-mfr](https://huggingface.co/breezedeus/pix2text-mfr) |
185
- | 📄 **More Infos** | [breezedeus.com/pix2text](https://www.breezedeus.com/pix2text) |
186
- | 👨🏻‍💻 **Author** | [Breezedeus](https://www.breezedeus.com) |
187
 
188
  If useful, please help to **star 🌟 [Pix2Text](https://github.com/breezedeus/pix2text)** 🙏
189
  </div>
@@ -191,7 +175,7 @@ If useful, please help to **star 🌟 [Pix2Text](https://github.com/breezedeus/p
191
 
192
  with gr.Blocks() as demo:
193
  gr.HTML(
194
- f'<h1 style="text-align: center; margin-bottom: 1rem;"><a href="https://github.com/breezedeus/pix2text" target="_blank">Pix2Text V1.0</a>{title}</h1>'
195
  )
196
  with gr.Row(equal_height=False):
197
  with gr.Column(min_width=200, variant='panel', scale=3):
@@ -214,9 +198,9 @@ If useful, please help to **star 🌟 [Pix2Text](https://github.com/breezedeus/p
214
  value='mfr-pro (paid)',
215
  )
216
  rec_type = gr.Dropdown(
217
- label='Image Type',
218
- choices=['mixed', 'formula', 'text'],
219
- value='mixed',
220
  # info='Which type of image to be recognized.',
221
  )
222
  with gr.Accordion('More Options', open=False):
@@ -273,11 +257,7 @@ If useful, please help to **star 🌟 [Pix2Text](https://github.com/breezedeus/p
273
  gr.Examples(
274
  label='Examples',
275
  examples=examples,
276
- inputs=[
277
- lang_list,
278
- rec_type,
279
- image_file,
280
- ],
281
  outputs=[rec_result, det_result],
282
  fn=example_func,
283
  cache_examples=os.getenv('CACHE_EXAMPLES') == '1',
 
50
 
51
 
52
  def get_p2t_model(lan_list: list, mfd_model_name: str, mfr_model_name: str):
53
+ mfd_config = {}
54
  if 'yolov7_tiny' not in mfd_model_name:
55
  mfd_fp = prepare_mfd_model()
56
+ mfd_config = dict( # 声明 LayoutAnalyzer 的初始化参数
 
57
  model_type='yolov7', # 表示使用的是 YoloV7 模型,而不是 YoloV7_Tiny 模型
58
  model_fp=mfd_fp, # 注:修改成你的模型文件所存储的路径
59
  )
 
62
  formula_config = dict( # 声明 LayoutAnalyzer 的初始化参数
63
  model_name='mfr-pro', model_backend='onnx',
64
  )
65
+ text_formula_config = dict(
66
+ languages=lan_list, mfd=mfd_config, formula=formula_config,
 
 
67
  )
68
+ total_config = {
69
+ 'layout': {'scores_thresh': 0.45},
70
+ 'text_formula': text_formula_config,
71
+ }
72
+ p2t = Pix2Text.from_config(total_configs=total_config,)
73
  return p2t
74
 
75
 
 
84
  lang_list = [LANGUAGES[l] for l in lang_list]
85
  p2t = get_p2t_model(lang_list, mfd_model_name, mfr_model_name)
86
 
87
+ # 如果 OUTPUT_RESULT_DIR 文件数量超过 100,按时间删除最早的 100 个文件
88
+ if len(os.listdir(OUTPUT_RESULT_DIR)) > 100:
89
+ shutil.rmtree(OUTPUT_RESULT_DIR)
90
+ OUTPUT_RESULT_DIR.mkdir(exist_ok=True)
91
+
92
+ out_det_fp = './docs/no-det-res.jpg'
93
+ kwargs = dict(
94
+ resized_shape=resized_shape,
95
+ return_text = True,
96
+ auto_line_break = True,
97
+ )
98
+ if rec_type == 'page':
99
+ suffix = list(string.ascii_letters)
100
+ random.shuffle(suffix)
101
+ suffix = ''.join(suffix[:6])
102
+ fp_suffix = f'{time.time()}-{suffix}'
103
+ out_debug_dir = f'out-debug-{fp_suffix}'
104
+ output_dir = OUTPUT_RESULT_DIR / f'output-{fp_suffix}'
105
+ kwargs['save_debug_res'] = OUTPUT_RESULT_DIR / out_debug_dir
106
+ elif rec_type == 'text_formula':
107
  suffix = list(string.ascii_letters)
108
  random.shuffle(suffix)
109
  suffix = ''.join(suffix[:6])
110
  out_det_fp = f'out-det-{time.time()}-{suffix}.jpg'
111
+ kwargs['save_analysis_res'] = str(OUTPUT_RESULT_DIR / out_det_fp)
112
+
113
+ out = p2t.recognize(image_file, file_type=rec_type, **kwargs)
114
+ out_text = out
115
+ if rec_type == 'page':
116
+ out_text = out.to_markdown(output_dir)
117
+ out_det_fp =kwargs['save_debug_res'] / 'layout_res.jpg'
118
+ elif rec_type == 'text_formula':
119
+ out_det_fp = kwargs['save_analysis_res']
 
 
 
120
 
121
+ return out_text, out_det_fp
 
 
 
 
 
 
 
122
 
123
 
124
  def example_func(lang_list, rec_type, image_file):
 
138
 
139
  title = ': a Free Alternative to Mathpix'
140
  examples = [
141
+ [['English'], 'page', 'docs/examples/page.png',],
142
+ [['English'], 'text_formula', 'docs/examples/mixed-en.jpg',],
143
+ [['English', 'Chinese Simplified'], 'text_formula', 'docs/examples/mixed-ch_sim.jpg',],
 
 
 
 
 
 
 
144
  [
145
  ['English', 'Chinese Traditional'],
146
+ 'text_formula',
147
  'docs/examples/mixed-ch_tra.jpg',
148
  ],
149
+ [['English', 'Vietnamese'], 'text_formula', 'docs/examples/mixed-vietnamese.jpg',],
150
+ [['English'], 'formula', 'docs/examples/formula1.png'],
151
+ [['English'], 'formula', 'docs/examples/formula2.jpg'],
152
+ [['English'], 'formula', 'docs/examples/hw-formula.png'],
153
+ [['English', 'Chinese Simplified'], 'text', 'docs/examples/pure-text.jpg',],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  ]
155
 
156
  table_desc = """
157
  <div align="center">
158
+ <img src="https://pix2text.readthedocs.io/zh/latest/figs/p2t-logo.png" width="120px"/>
159
 
160
+ [![Visitors](https://api.visitorbadge.io/api/visitors?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2Fbreezedeus%2Fpix2text-demo&labelColor=%23697689&countColor=%23f5c791&style=flat&labelStyle=upper)](https://visitorbadge.io/status?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2Fbreezedeus%2Fpix2text-demo)
161
 
162
+ [![Discord](https://img.shields.io/discord/1200765964434821260?logo=discord&label=Discord)](https://discord.gg/GgD87WM8Tf)
163
 
164
  | | |
165
  | ------------------------------- | --------------------------------------- |
166
  | 🏄 **Online Service** | [p2t.breezedeus.com](https://p2t.breezedeus.com) |
167
+ | 📖 **Doc** | [Online Doc](https://pix2text.readthedocs.io) |
168
  | 📀 **Code** | [Github](https://github.com/breezedeus/pix2text) |
169
+ | 🤗 **Models** | [breezedeus/Models](https://huggingface.co/breezedeus) |
170
+ | 📄 **More Infos** | [Pix2Text Infos](https://www.breezedeus.com/article/pix2text) |
 
171
 
172
  If useful, please help to **star 🌟 [Pix2Text](https://github.com/breezedeus/pix2text)** 🙏
173
  </div>
 
175
 
176
  with gr.Blocks() as demo:
177
  gr.HTML(
178
+ f'<h1 style="text-align: center; margin-bottom: 1rem;"><a href="https://github.com/breezedeus/pix2text" target="_blank">Pix2Text V1.1</a>{title}</h1>'
179
  )
180
  with gr.Row(equal_height=False):
181
  with gr.Column(min_width=200, variant='panel', scale=3):
 
198
  value='mfr-pro (paid)',
199
  )
200
  rec_type = gr.Dropdown(
201
+ label='File Type',
202
+ choices=['page', 'text_formula', 'formula', 'text'],
203
+ value='text_formula',
204
  # info='Which type of image to be recognized.',
205
  )
206
  with gr.Accordion('More Options', open=False):
 
257
  gr.Examples(
258
  label='Examples',
259
  examples=examples,
260
+ inputs=[lang_list, rec_type, image_file,],
 
 
 
 
261
  outputs=[rec_result, det_result],
262
  fn=example_func,
263
  cache_examples=os.getenv('CACHE_EXAMPLES') == '1',
docs/examples/page.png ADDED
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
  --extra-index-url https://pypi.org/simple
2
 
3
  pyyaml
4
- pix2text[multilingual]>=1.0
 
1
  --extra-index-url https://pypi.org/simple
2
 
3
  pyyaml
4
+ pix2text[multilingual]>=1.1