import gradio as gr from paddleocr import PPStructure, save_structure_res import cv2 import os import numpy as np import shutil import uuid from PIL import Image # Initialize the PP-Structure table model once to avoid reloading for each request table_engine = PPStructure( show_log=False, image_orientation=True, ) def extract_table(image): # Generate a unique ID for the session to avoid conflicts session_id = str(uuid.uuid4()) save_folder = os.path.join('./output', session_id) os.makedirs(save_folder, exist_ok=True) # Convert PIL Image to OpenCV format image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) # Perform table recognition on the image result = table_engine(image_cv) # Save the recognized table data img_name = 'input_image' # You can customize this as needed save_structure_res(result, save_folder, img_name) # The save_structure_res function saves an Excel file with extension .xlsx excel_file = os.path.join(save_folder, img_name, 'excel', img_name + '.xlsx') if os.path.exists(excel_file): # Return the path to the Excel file for download return excel_file else: # Handle case where no table was detected return "No table detected in the image." # Define the Gradio interface iface = gr.Interface( fn=extract_table, inputs=gr.Image(type="pil"), outputs=gr.File(label="Extracted Excel File"), title="Table Data Extractor", description=""" **Extract Table Data from Images** Upload an image of a financial statement (e.g., income statement, balance sheet) to extract the table data into an Excel file. **Instructions:** 1. Click 'Browse' or drag and drop an image file. 2. Wait for the processing to complete. 3. Download the extracted Excel file. """, allow_flagging="never", examples=[], ) if __name__ == "__main__": iface.launch()