import streamlit as st from transformers import PaliGemmaProcessor, PaliGemmaForConditionalGeneration from PIL import Image import torch import os def load_model(): """Load PaliGemma2 model and processor.""" processor = PaliGemmaProcessor.from_pretrained("google/paligemma2") model = PaliGemmaForConditionalGeneration.from_pretrained("google/paligemma2") return processor, model def process_image(image, processor, model): """Extract text from image using PaliGemma2.""" # Preprocess the image inputs = processor(images=image, return_tensors="pt") # Generate predictions with torch.no_grad(): generated_ids = model.generate(**inputs) text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] return text def main(): # Set page configuration st.set_page_config(page_title="Text Reading with PaliGemma2", layout="centered") st.title("Text Reading from Images using PaliGemma2") # Load model and processor with st.spinner("Loading PaliGemma2 model... This may take a few moments."): processor, model = load_model() st.success("Model loaded successfully!") # User input: upload image uploaded_image = st.file_uploader("Upload an image containing text", type=["png", "jpg", "jpeg"]) if uploaded_image is not None: # Display uploaded image image = Image.open(uploaded_image) st.image(image, caption="Uploaded Image", use_column_width=True) # Extract text button if st.button("Extract Text"): with st.spinner("Processing image..."): extracted_text = process_image(image, processor, model) st.success("Text extraction complete!") st.subheader("Extracted Text") st.write(extracted_text) # Footer st.markdown("---") st.markdown("**Built with [PaliGemma2](https://huggingface.co/google/paligemma2) and Streamlit**") if __name__ == "__main__": main()