AIP / app.py
medasaicharan2's picture
Update app.py
1362446
import gradio as gr
import pandas as pd
import tensorflow as tf
from keras.models import load_model
import numpy as np
import PIL
from PIL import Image, ImageOps
import cv2
import pytesseract
import os
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
import requests
langs = []
choices = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
# Load the TROCR model and processor
trocr_processor = TrOCRProcessor.from_pretrained('microsoft/trocr-large-handwritten')
trocr_model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-large-handwritten')
def trocr_predict(image):
# Process image for TROCR
pixel_values = trocr_processor(images=image, return_tensors="pt").pixel_values
# Perform OCR using TROCR
generated_ids = trocr_model.generate(pixel_values)
decoded_text = trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
return decoded_text
def run(image, lang=None):
result = pytesseract.image_to_string(
image, lang=None if lang == [] else lang)
return result
# Loading different models
mnist_cnn_model = load_model('./models/mnist_cnn_model.h5')
mnist_mcdnn_model = load_model('./models/mnist_mcdnn_model.h5')
# Data Preprocessing
def pre_process(image, model_name):
img = image.convert("L")
# Select model based on the dropdown selection
if model_name == "Simple CNN":
model = mnist_cnn_model
img = img.resize((28, 28))
img = np.array(img) / 255.0
final_img = np.expand_dims(img, axis=0) # Add batch dimension
pred = model.predict(final_img)
result = np.argmax(pred)
return str(result)
elif model_name == "MCDNN":
model = mnist_mcdnn_model
img = img.resize((28, 28))
img = np.array(img) / 255.0
final_img = np.expand_dims(img, axis=0) # Add batch dimension
pred = model.predict(final_img)
result = np.argmax(pred)
return str(result)
elif model_name == "OCR":
pred = run(image)
return pred
elif model_name == "CNN BI-LSTM":
return trocr_predict(image)
else:
return "Invalid model selection"
# Defining model options for dropdown
model_options = ["Simple CNN", "MCDNN", "OCR", "CNN BI-LSTM"]
with gr.Blocks() as demo:
with gr.Row():
gr.Label("ScriptSense", container=False)
with gr.Row():
with gr.Column():
image_file = gr.inputs.Image(type="pil", label="Upload an image")
model_select = gr.components.Dropdown(choices=model_options, label="Select Model")
with gr.Row():
predict_btn = gr.Button(value="Predict")
with gr.Row():
Answer = gr.Label("πŸ‘‹ Hello, Let us predict the Result πŸ’β€β™‚οΈ", container=False)
predict_btn.click(
pre_process,
inputs=[
image_file, model_select
],
outputs=[Answer],
)
demo.launch()