Spaces:

Jiangxz
/

Math_OCR

Running

App Files Files

Math_OCR / app.py

Jiangxz

Upload 2 files

e4ca6c2 verified 9 months ago

raw

history blame

4.42 kB

	import os
	os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
	import io
	import pandas as pd
	import streamlit as st
	from streamlit_drawable_canvas import st_canvas
	import hashlib
	import pypdfium2
	from texify.inference import batch_inference
	from texify.model.model import load_model
	from texify.model.processor import load_processor
	from texify.output import replace_katex_invalid
	from PIL import Image

	MAX_WIDTH = 800
	MAX_HEIGHT = 1000

	@st.cache_resource()
	def load_model_cached():
	return load_model()
	@st.cache_resource()
	def load_processor_cached():
	return load_processor()
	@st.cache_data()
	def infer_image(pil_image, bbox, temperature):
	input_img = pil_image.crop(bbox)
	model_output = batch_inference([input_img], model, processor, temperature=temperature)
	return model_output[0]

	def open_pdf(pdf_file):
	stream = io.BytesIO(pdf_file.getvalue())
	return pypdfium2.PdfDocument(stream)

	@st.cache_data()
	def get_page_image(pdf_file, page_num, dpi=96):
	doc = open_pdf(pdf_file)
	renderer = doc.render(
	pypdfium2.PdfBitmap.to_pil,
	page_indices=[page_num - 1],
	scale=dpi / 72,
	)
	png = list(renderer)[0]
	png_image = png.convert("RGB")
	return png_image

	@st.cache_data()
	def get_uploaded_image(in_file):
	return Image.open(in_file).convert("RGB")

	def resize_image(pil_image):
	if pil_image is None:
	return
	pil_image.thumbnail((MAX_WIDTH, MAX_HEIGHT), Image.Resampling.LANCZOS)

	@st.cache_data()
	def page_count(pdf_file):
	doc = open_pdf(pdf_file)
	return len(doc)

	def get_canvas_hash(pil_image):
	return hashlib.md5(pil_image.tobytes()).hexdigest()

	@st.cache_data()
	def get_image_size(pil_image):
	if pil_image is None:
	return MAX_HEIGHT, MAX_WIDTH
	height, width = pil_image.height, pil_image.width
	return height, width

	st.set_page_config(layout="wide")

	top_message = """### LaTeX：Math OCR
	上傳圖片或 PDF 檔案後，請通過拖曳畫一個框圈選你想進行 OCR 的方程式，拖曳框圈範圍以框選數學公式範圍即可，框好後即直接開始辨識轉換為 LaTeX 格式，最終辨識結果會顯示在右側邊欄。
	"""

	st.markdown(top_message)
	col1, col2 = st.columns([.7, .3])

	model = load_model_cached()
	processor = load_processor_cached()

	in_file = st.sidebar.file_uploader("上傳圖片或 PDF 檔案：", type=["pdf", "png", "jpg", "jpeg", "gif", "webp"])
	if in_file is None:
	st.stop()

	filetype = in_file.type
	whole_image = False
	if "pdf" in filetype:
	page_count = page_count(in_file)
	page_number = st.sidebar.number_input(f"Page number out of {page_count}:", min_value=1, value=1, max_value=page_count)

	pil_image = get_page_image(in_file, page_number)
	else:
	pil_image = get_uploaded_image(in_file)
	whole_image = st.sidebar.button("OCR 圖片")

	resize_image(pil_image)

	temperature = st.sidebar.slider("Temperature：", min_value=0.0, max_value=1.0, value=0.0, step=0.05)

	canvas_hash = get_canvas_hash(pil_image) if pil_image else "canvas"

	with col1:
	canvas_result = st_canvas(
	fill_color="rgba(255, 165, 0, 0.1)",
	stroke_width=1,
	stroke_color="#FFAA00",
	background_color="#FFF",
	background_image=pil_image,
	update_streamlit=True,
	height=get_image_size(pil_image)[0],
	width=get_image_size(pil_image)[1],
	drawing_mode="rect",
	point_display_radius=0,
	key=canvas_hash,
	)

	if canvas_result.json_data is not None or whole_image:
	objects = pd.json_normalize(canvas_result.json_data["objects"])
	bbox_list = None
	if objects.shape[0] > 0:
	boxes = objects[objects["type"] == "rect"][["left", "top", "width", "height"]]
	boxes["right"] = boxes["left"] + boxes["width"]
	boxes["bottom"] = boxes["top"] + boxes["height"]
	bbox_list = boxes[["left", "top", "right", "bottom"]].values.tolist()
	if whole_image:
	bbox_list = [(0, 0, pil_image.width, pil_image.height)]

	if bbox_list:
	with col2:
	inferences = [infer_image(pil_image, bbox, temperature) for bbox in bbox_list]
	for idx, inference in enumerate(reversed(inferences)):
	st.markdown(f"### {len(inferences) - idx}")
	katex_markdown = replace_katex_invalid(inference)
	st.markdown(katex_markdown)
	st.code(inference)
	st.divider()