File size: 2,948 Bytes
f12e6af
 
 
 
 
 
 
 
d8d9bac
d229e0d
f025c6a
 
 
d229e0d
f12e6af
d229e0d
f025c6a
 
 
472ab9e
 
f025c6a
 
 
 
472ab9e
 
f025c6a
 
 
 
 
d229e0d
 
 
 
f12e6af
 
 
 
 
 
 
 
 
 
1362446
f12e6af
 
 
 
 
 
 
1362446
f12e6af
 
 
 
 
 
 
1362446
d229e0d
d8d9bac
1362446
f025c6a
f12e6af
 
 
 
1362446
f12e6af
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import gradio as gr
import pandas as pd
import tensorflow as tf
from keras.models import load_model
import numpy as np
import PIL
from PIL import Image, ImageOps
import cv2
import pytesseract
import os
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
import requests

langs = []

choices = os.popen('tesseract --list-langs').read().split('\n')[1:-1]


# Load the TROCR model and processor
trocr_processor = TrOCRProcessor.from_pretrained('microsoft/trocr-large-handwritten')
trocr_model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-large-handwritten')

def trocr_predict(image):
    # Process image for TROCR
    pixel_values = trocr_processor(images=image, return_tensors="pt").pixel_values
    

    # Perform OCR using TROCR
    generated_ids = trocr_model.generate(pixel_values)
    decoded_text = trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return decoded_text
    
def run(image, lang=None):
    result = pytesseract.image_to_string(
        image, lang=None if lang == [] else lang)
    return result

# Loading different models
mnist_cnn_model = load_model('./models/mnist_cnn_model.h5')
mnist_mcdnn_model = load_model('./models/mnist_mcdnn_model.h5')

# Data Preprocessing
def pre_process(image, model_name):
    img = image.convert("L")
    
    # Select model based on the dropdown selection
    if model_name == "Simple CNN":
        model = mnist_cnn_model
        img = img.resize((28, 28))
        img = np.array(img) / 255.0
        final_img = np.expand_dims(img, axis=0)  # Add batch dimension
        pred = model.predict(final_img)
        result = np.argmax(pred)
        return str(result)
    elif model_name == "MCDNN":
        model = mnist_mcdnn_model
        img = img.resize((28, 28))
        img = np.array(img) / 255.0
        final_img = np.expand_dims(img, axis=0)  # Add batch dimension
        pred = model.predict(final_img)
        result = np.argmax(pred)
        return str(result)
    elif model_name == "OCR":
        pred = run(image)
        return pred
    elif model_name == "CNN BI-LSTM":
        return trocr_predict(image)
    else:
        return "Invalid model selection"

# Defining model options for dropdown
model_options = ["Simple CNN", "MCDNN", "OCR", "CNN BI-LSTM"]


with gr.Blocks() as demo:
    with gr.Row():
        gr.Label("ScriptSense", container=False)

    with gr.Row():
        with gr.Column():
            image_file = gr.inputs.Image(type="pil", label="Upload an image")
            model_select = gr.components.Dropdown(choices=model_options, label="Select Model")

    with gr.Row():
        predict_btn = gr.Button(value="Predict")

    with gr.Row():
        Answer = gr.Label("πŸ‘‹ Hello, Let us predict the Result πŸ’β€β™‚οΈ", container=False)

    predict_btn.click(
        pre_process,
        inputs=[
            image_file, model_select
        ],
        outputs=[Answer],
    )

demo.launch()