import gradio as gr import torch import pytesseract import cv2 import tempfile from transformers import AutoTokenizer, AutoModel pytesseract.pytesseract.tesseract_cmd = r'/opt/homebrew/bin/tesseract' # Update this if necessary # Load the tokenizer and model tokenizer_eng = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True) model_eng = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True).eval() def perform_ocr(image, language): # Save the NumPy array as an image file temporarily with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file: temp_filename = temp_file.name cv2.imwrite(temp_filename, image) # Perform OCR for English res_eng = model_eng.chat(tokenizer_eng, temp_filename, ocr_type='ocr') # Clean up temporary file if needed # os.remove(temp_filename) return res_eng # Return results for English def ocr_and_search(image, language): # Call the perform_ocr function english_text = perform_ocr(image, language) return english_text # Return the OCR result for English # Create Gradio interface iface = gr.Interface( fn=ocr_and_search, inputs=[ gr.Image(type="numpy", label="Upload Image"), gr.Dropdown(choices=["English", "Hindi"], label="Select Language") ], outputs=gr.Textbox(label="Extracted Text"), title="OCR Application", description="Upload an image to extract text using OCR." ) # Run the app if __name__ == "__main__": iface.launch()