zuminghuang KexuanRen commited on
Commit
544bc94
·
1 Parent(s): 5410761

Update app.py (#11)

Browse files

- Update app.py (f1998d231971cfd16cc584809a24b5f7c631df22)


Co-authored-by: KEYE <KexuanRen@users.noreply.huggingface.co>

Files changed (1) hide show
  1. app.py +19 -554
app.py CHANGED
@@ -1,565 +1,30 @@
1
  import os
2
- import re
3
  import sys
4
- import json
5
- import time
6
- import copy
7
- import base64
8
- import asyncio
9
- import tempfile
10
  import subprocess
11
- from pathlib import Path
12
- from datetime import datetime
13
- import zipfile
14
- import httpx, aiofiles, os, asyncio
15
- import numpy as np
16
- import gradio as gr
17
- from PIL import Image
18
- from pdf2image import convert_from_path
19
- from loguru import logger
20
- from openai import OpenAI, AsyncOpenAI
21
- from gradio_pdf import PDF
22
- import certifi
23
- import httpx
24
- import aiohttp
25
- import uuid
26
- import tqdm
27
- import base64, pathlib
28
- from io import BytesIO
29
- from pdf2image import convert_from_bytes, convert_from_path # pip install pdf2image
30
 
31
- import requests
 
 
 
32
 
33
-
34
- def setup_poppler_linux():
35
- poppler_dir = "/tmp/poppler"
36
- if not os.path.exists(poppler_dir):
37
- os.makedirs(poppler_dir, exist_ok=True)
38
- subprocess.run(["bash", "-lc", "rm -f /etc/apt/sources.list.d/*nodesource*.list || true"], check=False)
39
- subprocess.run([
40
- "apt-get", "update"
41
- ], check=True)
42
- subprocess.run([
43
- "apt-get", "install", "-y", "poppler-utils"
44
- ], check=True)
45
-
46
- setup_poppler_linux()
47
-
48
-
49
-
50
- preset_prompts = [
51
- "Please convert the document into Markdown format.",
52
- "Generate a clean and structured Markdown version of the document.",
53
- "Transform this content into Markdown with proper headings and bullet points.",
54
- "Convert the text to Markdown, preserving structure and formatting.",
55
- "Reformat this document as Markdown with clear sections and lists.",
56
- ]
57
-
58
- openai_api_key = "EMPTY"
59
- openai_api_base = os.environ.get("infinity_parser1_api")
60
- Authorization = os.environ.get("infinity_parser1_Authorization")
61
-
62
- AVAILABLE_MODELS = {
63
- "Infinity-Parser-7B": {
64
- "name": os.environ.get("infinity_parser1_name"),
65
- "client": AsyncOpenAI(
66
- api_key=openai_api_key,
67
- base_url=os.environ.get("infinity_parser1_api") + "/v1",
68
- ),
69
- "Authorization": os.environ.get("infinity_parser1_Authorization")
70
-
71
- },
72
- "Infinity-Parser2-30B-A3B-Preview": {
73
- "name": os.environ.get("infinity_parser2_name"),
74
- "client": AsyncOpenAI(
75
- api_key=openai_api_key,
76
- base_url=os.environ.get("infinity_parser2_api") + "/v1",
77
- ),
78
- "Authorization": os.environ.get("infinity_parser2_Authorization")
79
- }
80
- }
81
-
82
- def send_pdf_to_parse(file_path, server_ip, port, route="/upload", api_key=None):
83
- url = f"{openai_api_base}{route}"
84
- headers = {}
85
- if api_key:
86
- headers["Authorization"] = f"Bearer {api_key}"
87
-
88
- with open(file_path, "rb") as f:
89
- files = {"file": (os.path.basename(file_path), f, "application/pdf")}
90
- response = requests.post(url, files=files, headers=headers)
91
- return response
92
-
93
- async def send_pdf_async_aiohttp(file_path, server_ip, route="/upload", Authorization=None):
94
- """使用aiohttp异步发送PDF"""
95
- url = f"{server_ip}{route}"
96
- headers = {}
97
- if Authorization:
98
- headers["Authorization"] = f"Bearer {Authorization}"
99
-
100
- try:
101
- async with aiohttp.ClientSession() as session:
102
- with open(file_path, "rb") as f:
103
- data = aiohttp.FormData()
104
- data.add_field('file', f, filename=os.path.basename(file_path), content_type='application/pdf')
105
- async with session.post(url, data=data, headers=headers) as response:
106
- print(f"PDF发送成功: {file_path}, 状态码: {response.status}")
107
- return response
108
- except Exception as e:
109
- print(f"PDF发送失败: {file_path}, 错误: {e}")
110
- return None
111
-
112
-
113
- def extract_makrdown(text):
114
- m = re.search(r'```markdown\s*([\s\S]*?)```', text, re.MULTILINE)
115
- if m:
116
- return m.group(1).strip()
117
  else:
118
- return text
119
-
120
-
121
- async def request(messages, model_name, client, Authorization):
122
-
123
- chat_completion_from_base64 = await client.chat.completions.create(
124
- messages=messages,
125
- extra_headers={
126
- "Authorization": f"Bearer {Authorization}"
127
- },
128
- model=model_name,
129
- max_completion_tokens=4096,
130
- stream=True,
131
- temperature=0.0,
132
- top_p=0.95
133
- )
134
-
135
- page = ""
136
- async for chunk in chat_completion_from_base64:
137
- if chunk.choices[0].delta.content:
138
- content = chunk.choices[0].delta.content
139
-
140
- choice = chunk.choices[0]
141
- if choice.finish_reason is not None:
142
- print(f"end reason = {choice.finish_reason}")
143
- break
144
- page += content
145
-
146
- yield content
147
-
148
-
149
- def images_to_pdf(img_paths, pdf_path):
150
-
151
- if isinstance(img_paths, (str, Path)):
152
- img_paths = [img_paths]
153
-
154
- if not img_paths:
155
- raise ValueError("img_paths is empty")
156
- images = []
157
- for p in img_paths:
158
- p = Path(p)
159
- if not p.is_file():
160
- raise FileNotFoundError(p)
161
 
162
- img = Image.open(p)
163
- if img.mode in ("RGBA", "P"):
164
- img = img.convert("RGB")
165
- images.append(img)
166
 
167
- pdf_path = Path(pdf_path)
168
- pdf_path.parent.mkdir(parents=True, exist_ok=True)
169
- images[0].save(pdf_path,
170
- save_all=True,
171
- append_images=images[1:],
172
- resolution=300.0)
173
- return pdf_path
174
-
175
-
176
- def encode_image(image_path):
177
- with open(image_path, "rb") as image_file:
178
- return base64.b64encode(image_file.read()).decode("utf-8")
179
-
180
- def build_message(image_path, prompt):
181
-
182
- content = [
183
- {
184
- "type": "image_url",
185
- "image_url": {
186
- "url": f"data:image/jpeg;base64,{encode_image(image_path)}"
187
- }
188
- },
189
- {"type": "text", 'text': prompt}
190
- ]
191
-
192
 
193
- messages = [
194
- {"role": "system", "content": "You are a helpful assistant."},
195
- {'role': 'user', 'content': content}
196
-
197
- ]
198
-
199
- return messages
200
-
201
-
202
-
203
- def download_markdown_file(md_text):
204
- filename = f"markdown_{uuid.uuid4().hex[:8]}.md"
205
- filepath = Path("downloads") / filename
206
- filepath.parent.mkdir(exist_ok=True)
207
- with open(filepath, "w", encoding="utf-8") as f:
208
- f.write(md_text)
209
- return str(filepath)
210
-
211
-
212
- async def doc_parser(doc_path, prompt, model_id):
213
- model_name = AVAILABLE_MODELS[model_id]["name"]
214
- client = AVAILABLE_MODELS[model_id]["client"]
215
- Authorization = AVAILABLE_MODELS[model_id]["Authorization"]
216
-
217
- doc_path = Path(doc_path)
218
- if not doc_path.is_file():
219
- raise FileNotFoundError(doc_path)
220
-
221
- with tempfile.TemporaryDirectory() as tmpdir:
222
- tmpdir = Path(tmpdir)
223
-
224
- queries = []
225
- if doc_path.suffix.lower() == ".pdf":
226
- pages: List[Image.Image] = convert_from_path(doc_path, dpi=300)
227
- for idx, page in enumerate(pages, start=1):
228
- img_path = tmpdir / f"page_{idx}.png"
229
- page.save(img_path, "PNG")
230
-
231
- messages = build_message(img_path, prompt)
232
- queries.append(messages)
233
-
234
- else:
235
- messages = build_message(doc_path, prompt)
236
- queries.append(messages)
237
-
238
- all_pages = []
239
- all_pages_raw = []
240
- for query in queries:
241
- pages = ""
242
- async for chunk in request(query, model_name, client, Authorization):
243
- pages += chunk
244
- yield extract_makrdown(pages), pages
245
- all_pages.append(extract_makrdown(pages))
246
- all_pages_raw.append(pages)
247
- print(all_pages)
248
- yield "\n---\n".join(all_pages), "\n\n".join(all_pages_raw)
249
-
250
-
251
- def compress_directory_to_zip(directory_path, output_zip_path):
252
  try:
253
- with zipfile.ZipFile(output_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
254
-
255
-
256
- for root, dirs, files in os.walk(directory_path):
257
- for file in files:
258
-
259
- file_path = os.path.join(root, file)
260
-
261
- arcname = os.path.relpath(file_path, directory_path)
262
-
263
- zipf.write(file_path, arcname)
264
- return 0
265
- except Exception as e:
266
- logger.exception(e)
267
- return -1
268
-
269
- latex_delimiters = [
270
- {'left': '$$', 'right': '$$', 'display': True},
271
- {'left': '$', 'right': '$', 'display': False},
272
- {'left': '\\(', 'right': '\\)', 'display': False},
273
- {'left': '\\[', 'right': '\\]', 'display': True},
274
- ]
275
-
276
- def check_prompt(prompt):
277
- if not prompt or prompt.strip() == "":
278
- raise gr.Error("Please select or enter a prompt before parsing.")
279
- return prompt
280
-
281
- def to_file(image_path):
282
-
283
- if image_path.endswith("Academic_Papers.png"):
284
- image_path = image_path.replace("Academic_Papers.png", "Academic_Papers.pdf")
285
-
286
- return image_path
287
-
288
- def render_img(b64_list, idx, scale):
289
- """根据当前索引 idx 和缩放倍数 scale 渲染 HTML。"""
290
- if not b64_list:
291
- return "<p style='color:gray'>请先上传图片</p>"
292
- idx %= len(b64_list)
293
- src = b64_list[idx]
294
- # return (
295
- # f'<div style="overflow:auto;border:1px solid #ccc;'
296
- # f'display:flex;justify-content:center;align-items:center;' # ① 横纵向居中
297
- # f'width:100%;height:800px;">' # ② 容器尺寸
298
- # f'<img src="{src}" '
299
- # f'style="transform:scale({scale});transform-origin:center center;" />' # ③ 以中心缩放
300
- # f'</div>'
301
- # )
302
-
303
-
304
- # 以百分比形式设置 width,height 自动等比
305
- percent = scale * 100
306
-
307
- if scale <= 1:
308
- # ---------- 居中模式 ----------
309
- return f"""
310
- <div style="
311
- width:100%;
312
- height:800px;
313
- overflow:auto;
314
- border:1px solid #ccc;
315
- ">
316
- <div style="
317
- min-width:100%; /* 保证外层 div 至少跟容器一样宽 */
318
- display:flex;
319
- justify-content:center; /* 小图水平居中 */
320
- ">
321
- <img src="{src}" style="
322
- width:{percent}%;
323
- height:auto;
324
- display:block;
325
- ">
326
- </div>
327
- </div>
328
- """
329
- else:
330
- # ---------- 放大模式 ----------
331
- return (
332
- f'<div style="overflow:auto;border:1px solid #ccc;'
333
- f'width:100%;height:800px;">'
334
- f' <img src="{src}" '
335
- f' style="width:{percent}%;max-width:none;'
336
- f' height:auto;display:block;" />'
337
- f'</div>'
338
- )
339
-
340
- def files_to_b64(file, pdf_dpi: int = 200):
341
- out: list[str] = []
342
- if hasattr(file, "data"):
343
- raw_bytes = file.data
344
- suffix = pathlib.Path(file.name).suffix.lower()
345
-
346
- # -- PDF --
347
- if suffix == ".pdf":
348
- pages = convert_from_bytes(raw_bytes, dpi=pdf_dpi)
349
- for page in pages:
350
- buf = BytesIO()
351
- page.save(buf, format="PNG")
352
- b64 = base64.b64encode(buf.getvalue()).decode()
353
- out.append(f"data:image/png;base64,{b64}")
354
- else:
355
- b64 = base64.b64encode(raw_bytes).decode()
356
- out.append(f"data:image/{suffix[1:]};base64,{b64}")
357
-
358
- else:
359
- path = pathlib.Path(file)
360
- suffix = path.suffix.lower()
361
-
362
- if suffix == ".pdf":
363
- pages = convert_from_path(str(path), dpi=pdf_dpi)
364
- for page in pages:
365
- buf = BytesIO()
366
- page.save(buf, format="PNG")
367
- b64 = base64.b64encode(buf.getvalue()).decode()
368
- out.append(f"data:image/png;base64,{b64}")
369
- else:
370
- raw_bytes = path.read_bytes()
371
- b64 = base64.b64encode(raw_bytes).decode()
372
- out.append(f"data:image/{suffix[1:]};base64,{b64}")
373
-
374
- return out
375
-
376
-
377
- async def process_file(file_path):
378
- """使用asyncio的异步方案"""
379
- if file_path is None:
380
- return None
381
-
382
- if not file_path.endswith(".pdf"):
383
- tmp_file_path = Path(file_path)
384
- tmp_file_path = tmp_file_path.with_suffix(".pdf")
385
- images_to_pdf(file_path, tmp_file_path)
386
- else:
387
- tmp_file_path = file_path
388
- asyncio.create_task(send_pdf_async_aiohttp(tmp_file_path, server_ip=openai_api_base, Authorization=Authorization))
389
-
390
- return str(tmp_file_path)
391
-
392
-
393
- def check_file(f):
394
- if f is None:
395
- raise gr.Error("Please upload a PDF or image before parsing.")
396
- return f
397
-
398
-
399
- if __name__ == '__main__':
400
- with gr.Blocks() as demo:
401
- with gr.Row():
402
- with gr.Column(variant='panel', scale=5):
403
-
404
- file = gr.File(label='Please upload a PDF or image', file_types=['.pdf', '.png', '.jpeg', '.jpg'], type="filepath")
405
- prompts = gr.Dropdown(
406
- choices=preset_prompts,
407
- label="Prompt",
408
- info="Enter or select prompts...",
409
- value=preset_prompts[0],
410
- multiselect=False,
411
- interactive=True,
412
- allow_custom_value=True,
413
- )
414
-
415
- with gr.Row():
416
- change_bu = gr.Button('Parse')
417
- clear_bu = gr.ClearButton(value='Clear')
418
-
419
- zoom = gr.Slider(0.5, 3, value=1, step=0.1, label="Image Scale")
420
- with gr.Row():
421
- prev_btn = gr.Button("⬅️ Pre")
422
- next_btn = gr.Button("Next ➡️")
423
-
424
- viewer = gr.HTML()
425
-
426
- example_root = os.path.join(os.path.dirname(__file__), 'examples')
427
- images = [
428
- os.path.join(example_root, f)
429
- for f in os.listdir(example_root)
430
- if f.lower().endswith(('png', 'jpg', 'jpeg'))
431
- ]
432
-
433
- with gr.Column(variant='panel', scale=5):
434
-
435
- model_selector = gr.Dropdown(
436
- choices=[(k, k) for k, v in AVAILABLE_MODELS.items()],
437
- value=list(AVAILABLE_MODELS.keys())[0], # 默认选择第一个模型
438
- label="Model Selection",
439
- info="Select the model to use for parsing",
440
- interactive=True,
441
- )
442
-
443
- with gr.Accordion("Examples", open=True):
444
- example_root = "examples"
445
- file_path = [
446
- os.path.join(example_root, f)
447
- for f in ["Financial_Reports.png", "Books.png", "Magazines.png", "Academic_Papers.png"]
448
-
449
- ]
450
-
451
- with gr.Row():
452
- for i, label in enumerate(["Financial Reports(IMG)", "Books(IMG)", "Magazines(IMG)", "Academic Papers(PDF)"]):
453
- with gr.Column(scale=1, min_width=120):
454
- gr.Image(
455
- value=file_path[i],
456
- width=120,
457
- height=90,
458
- show_label=False,
459
- show_download_button=False
460
- )
461
- gr.Button(label).click(fn=to_file, inputs=gr.State(file_path[i]), outputs=file)
462
-
463
-
464
- download_btn = gr.Button("⬇️ Generate download link", size="sm")
465
- output_file = gr.File(label='Parse result', interactive=False, elem_id="down-file-box",visible=False)
466
-
467
- gr.HTML("""
468
- <style>
469
- #down-file-box {
470
- max-height: 300px;
471
- }
472
- </style>
473
- """)
474
- with gr.Tabs():
475
- with gr.Tab('Markdown rendering'):
476
- md = gr.Markdown(label='Markdown rendering', height=1100, show_copy_button=True,
477
- latex_delimiters=latex_delimiters,
478
- line_breaks=True)
479
- with gr.Tab('Markdown text'):
480
- md_text = gr.TextArea(lines=45, show_copy_button=True)
481
-
482
- img_list_state = gr.State([])
483
- idx_state = gr.State(0)
484
-
485
- async def upload_handler(files):
486
-
487
- if files is None:
488
- return [], 0, ""
489
-
490
- if files.lower().endswith(".pdf"):
491
- asyncio.create_task(send_pdf_async_aiohttp(files, server_ip=openai_api_base, Authorization=Authorization))
492
-
493
- b64s = files_to_b64(files)
494
- return b64s, 0, render_img(b64s, 0, 1)
495
-
496
- file.change(
497
- upload_handler,
498
- inputs=file,
499
- outputs=[img_list_state, idx_state, viewer],
500
- ).then(
501
- lambda: gr.update(value=1), # 无输入,直接把 zoom 设为 1
502
- None, # inputs=None
503
- zoom # outputs=[zoom]
504
- )
505
-
506
- def show_prev(b64s, idx, scale):
507
- idx -= 1
508
- return idx, render_img(b64s, idx, scale)
509
-
510
- prev_btn.click(
511
- show_prev,
512
- inputs=[img_list_state, idx_state, zoom],
513
- outputs=[idx_state, viewer],
514
- )
515
-
516
- def show_next(b64s, idx, scale):
517
- idx += 1
518
- return idx, render_img(b64s, idx, scale)
519
-
520
- next_btn.click(
521
- show_next,
522
- inputs=[img_list_state, idx_state, zoom],
523
- outputs=[idx_state, viewer],
524
- )
525
-
526
- zoom.change(
527
- lambda b64s, idx, scale: render_img(b64s, idx, scale),
528
- inputs=[img_list_state, idx_state, zoom],
529
- outputs=viewer,
530
- )
531
-
532
-
533
-
534
- change_bu.click(
535
- fn=check_prompt,
536
- inputs=prompts,
537
- outputs=prompts
538
- ).then(
539
- lambda f: gr.update(visible=False),
540
- inputs=output_file,
541
- outputs=output_file
542
- ).then(
543
- fn=check_file,
544
- inputs=file,
545
- outputs=file
546
- ).then(
547
- fn=doc_parser,
548
- inputs=[file, prompts, model_selector],
549
- outputs=[md, md_text]
550
- )
551
-
552
- clear_bu.add([file, md, md_text])
553
-
554
- download_btn.click(
555
- fn=download_markdown_file,
556
- inputs=md_text,
557
- outputs=output_file
558
- ).then(
559
- lambda f: gr.update(visible=True),
560
- inputs=output_file,
561
- outputs=output_file
562
- )
563
-
564
 
565
- demo.launch(server_name='0.0.0.0',share=True)
 
 
1
  import os
 
2
  import sys
 
 
 
 
 
 
3
  import subprocess
4
+ from loguru import logger
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ def setup_and_run():
7
+ repo_url = "https://github.com/infly-ai/INF-MLLM.git"
8
+ clone_dir = "INF-MLLM"
9
+ repo_dir = "INF-MLLM/Infinity-Parser2/infinity_parser2"
10
 
11
+ if not os.path.exists(clone_dir):
12
+ logger.info(f"clone: {repo_url}")
13
+ subprocess.run(["git", "clone", repo_url], check=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  else:
15
+ logger.info("pulling...")
16
+ subprocess.run(["git", "pull"], cwd=clone_dir, check=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ os.environ['GRADIO_SSR_MODE'] = "false"
19
+ os.environ["GRADIO_SERVER_PORT"] = "7860"
20
+ os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
 
21
 
22
+ logger.info("run inf_gradio.py ...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  try:
25
+ subprocess.run([sys.executable, "gradio_app.py"], cwd=repo_dir, check=True)
26
+ except subprocess.CalledProcessError as e:
27
+ logger.error(f"Gradio exit: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
+ if __name__ == "__main__":
30
+ setup_and_run()