import gradio as gr import pandas as pd import tensorflow as tf from keras.models import load_model import numpy as np import PIL from PIL import Image, ImageOps import cv2 import pytesseract import os from transformers import TrOCRProcessor, VisionEncoderDecoderModel import requests langs = [] choices = os.popen('tesseract --list-langs').read().split('\n')[1:-1] # Load the TROCR model and processor trocr_processor = TrOCRProcessor.from_pretrained('microsoft/trocr-large-handwritten') trocr_model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-large-handwritten') def trocr_predict(image): # Process image for TROCR pixel_values = trocr_processor(images=image, return_tensors="pt").pixel_values # Perform OCR using TROCR generated_ids = trocr_model.generate(pixel_values) decoded_text = trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0] return decoded_text def run(image, lang=None): result = pytesseract.image_to_string( image, lang=None if lang == [] else lang) return result # Loading different models mnist_cnn_model = load_model('./models/mnist_cnn_model.h5') mnist_mcdnn_model = load_model('./models/mnist_mcdnn_model.h5') # Data Preprocessing def pre_process(image, model_name): img = image.convert("L") # Select model based on the dropdown selection if model_name == "Simple CNN": model = mnist_cnn_model img = img.resize((28, 28)) img = np.array(img) / 255.0 final_img = np.expand_dims(img, axis=0) # Add batch dimension pred = model.predict(final_img) result = np.argmax(pred) return str(result) elif model_name == "MCDNN": model = mnist_mcdnn_model img = img.resize((28, 28)) img = np.array(img) / 255.0 final_img = np.expand_dims(img, axis=0) # Add batch dimension pred = model.predict(final_img) result = np.argmax(pred) return str(result) elif model_name == "OCR": pred = run(image) return pred elif model_name == "CNN BI-LSTM": return trocr_predict(image) else: return "Invalid model selection" # Defining model options for dropdown model_options = ["Simple CNN", "MCDNN", "OCR", "CNN BI-LSTM"] with gr.Blocks() as demo: with gr.Row(): gr.Label("ScriptSense", container=False) with gr.Row(): with gr.Column(): image_file = gr.inputs.Image(type="pil", label="Upload an image") model_select = gr.components.Dropdown(choices=model_options, label="Select Model") with gr.Row(): predict_btn = gr.Button(value="Predict") with gr.Row(): Answer = gr.Label("👋 Hello, Let us predict the Result 💁‍♂️", container=False) predict_btn.click( pre_process, inputs=[ image_file, model_select ], outputs=[Answer], ) demo.launch()