Spaces:

Gizachew
/

amst

Build error

File size: 1,784 Bytes

# app.py

import gradio as gr
import torch
from PIL import Image
from model import load_model
from utils import preprocess_image, decode_predictions
import os

# Load the model (ensure the path is correct)
MODEL_PATH = "finetuned_recog_model.pth"
FONT_PATH = "NotoSansEthiopic-Regular.ttf"  # Path to your font

# Check if model file exists
if not os.path.exists(MODEL_PATH):
    raise FileNotFoundError(f"Model file not found at {MODEL_PATH}. Please provide the correct path.")

# Check if font file exists (if you plan to use it for any visualization)
if not os.path.exists(FONT_PATH):
    raise FileNotFoundError(f"Font file not found at {FONT_PATH}. Please provide the correct path.")

# Load the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = load_model(MODEL_PATH, device=device)

def recognize_text(image: Image.Image) -> str:
    """
    Function to recognize text from an image.
    """
    if image is None:
        return "No image provided."
    
    # Preprocess the image
    input_tensor = preprocess_image(image).unsqueeze(0).to(device)  # [1, 3, 224, 224]
    
    # Perform inference
    with torch.no_grad():
        log_probs = model(input_tensor)  # [H*W, 1, vocab_size]
    
    # Decode predictions
    recognized_texts = decode_predictions(log_probs)
    
    # Assuming batch size of 1
    return recognized_texts[0]

# Define Gradio Interface
iface = gr.Interface(
    fn=recognize_text,
    inputs=gr.Image(type="pil", label="Upload Image"),
    outputs=gr.Textbox(label="Recognized Amharic Text"),
    title="Amharic Text Recognition",
    description="Upload an image containing Amharic text, and the model will recognize and display the text."
)

# Launch the Gradio app
if __name__ == "__main__":
    iface.launch()