|
import streamlit as st |
|
|
|
from transformers import AutoProcessor, AutoModelForImageTextToText |
|
from PIL import Image |
|
|
|
import torch |
|
|
|
import os |
|
|
|
def load_model(): |
|
"""Load PaliGemma2 model and processor with Hugging Face token.""" |
|
|
|
token = os.getenv("HUGGINGFACEHUB_API_TOKEN") |
|
|
|
if not token: |
|
raise ValueError("Hugging Face API token not found. Please set it in the environment variables.") |
|
|
|
|
|
processor = AutoProcessor.from_pretrained("google/paligemma2-3b-pt-224", use_auth_token=token) |
|
model = AutoModelForImageTextToText.from_pretrained("google/paligemma2-3b-pt-224", use_auth_token=token) |
|
|
|
return processor, model |
|
|
|
def process_image(image, processor, model): |
|
"""Extract text from image using PaliGemma2.""" |
|
|
|
|
|
inputs = processor(images=image, return_tensors="pt") |
|
|
|
|
|
with torch.no_grad(): |
|
generated_ids = model.generate(**inputs) |
|
text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] |
|
|
|
return text |
|
|
|
def main(): |
|
|
|
st.set_page_config(page_title="Text Reading with PaliGemma2", layout="centered") |
|
st.title("Text Reading from Images using PaliGemma2") |
|
|
|
|
|
with st.spinner("Loading PaliGemma2 model... This may take a few moments."): |
|
try: |
|
processor, model = load_model() |
|
st.success("Model loaded successfully!") |
|
except ValueError as e: |
|
st.error(str(e)) |
|
st.stop() |
|
|
|
|
|
uploaded_image = st.file_uploader("Upload an image containing text", type=["png", "jpg", "jpeg"]) |
|
|
|
if uploaded_image is not None: |
|
|
|
image = Image.open(uploaded_image) |
|
st.image(image, caption="Uploaded Image", use_column_width=True) |
|
|
|
|
|
if st.button("Extract Text"): |
|
with st.spinner("Processing image..."): |
|
extracted_text = process_image(image, processor, model) |
|
st.success("Text extraction complete!") |
|
st.subheader("Extracted Text") |
|
st.write(extracted_text) |
|
|
|
|
|
st.markdown("---") |
|
st.markdown("**Built with [PaliGemma2](https://huggingface.co/google/paligemma2-3b-pt-224) and Streamlit**") |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|