Spaces:

hruday96
/

SnapText

Running

App Files Files Community

hruday96 commited on 2 days ago

Commit

a0d0642

•

1 Parent(s): 42e6cab

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -8

app.py CHANGED Viewed

@@ -1,20 +1,29 @@
 import streamlit as st  # Don't forget to include `streamlit` in your `requirements.txt` file to ensure the app runs properly on Hugging Face Spaces.
-from transformers import PaliGemmaProcessor, PaliGemmaForConditionalGeneration  # Make sure that the Hugging Face `transformers` library version supports the `PaliGemma2` model. You may need to specify the version in `requirements.txt`.
 from PIL import Image  # Ensure the `pillow` library is included in your `requirements.txt`.
 import torch  # Since PyTorch is required for this app, specify the appropriate version of `torch` in `requirements.txt` based on compatibility with the model.
 import os
 def load_model():
     """Load PaliGemma2 model and processor with Hugging Face token."""
     token = os.getenv("HUGGINGFACEHUB_API_TOKEN")  # Retrieve token from environment variable
     if not token:
         raise ValueError("Hugging Face API token not found. Please set it in the environment variables.")
-    processor = PaliGemmaProcessor.from_pretrained("google/paligemma2", token=token)
-    model = PaliGemmaForConditionalGeneration.from_pretrained("google/paligemma2", token=token)
     return processor, model
 def process_image(image, processor, model):
     """Extract text from image using PaliGemma2."""
     # Preprocess the image
     inputs = processor(images=image, return_tensors="pt")
@@ -29,7 +38,7 @@ def main():
     # Set page configuration
     st.set_page_config(page_title="Text Reading with PaliGemma2", layout="centered")
     st.title("Text Reading from Images using PaliGemma2")
     # Load model and processor
     with st.spinner("Loading PaliGemma2 model... This may take a few moments."):
         try:
@@ -38,10 +47,10 @@ def main():
         except ValueError as e:
             st.error(str(e))
             st.stop()
     # User input: upload image
     uploaded_image = st.file_uploader("Upload an image containing text", type=["png", "jpg", "jpeg"])
     if uploaded_image is not None:
         # Display uploaded image
         image = Image.open(uploaded_image)
@@ -54,10 +63,10 @@ def main():
                 st.success("Text extraction complete!")
                 st.subheader("Extracted Text")
                 st.write(extracted_text)
     # Footer
     st.markdown("---")
-    st.markdown("**Built with [PaliGemma2](https://huggingface.co/google/paligemma2) and Streamlit**")
 if __name__ == "__main__":
     main()

 import streamlit as st  # Don't forget to include `streamlit` in your `requirements.txt` file to ensure the app runs properly on Hugging Face Spaces.
+from transformers import AutoProcessor, AutoModelForImageTextToText  # Updated imports to reflect changes
 from PIL import Image  # Ensure the `pillow` library is included in your `requirements.txt`.
 import torch  # Since PyTorch is required for this app, specify the appropriate version of `torch` in `requirements.txt` based on compatibility with the model.
 import os
 def load_model():
     """Load PaliGemma2 model and processor with Hugging Face token."""
     token = os.getenv("HUGGINGFACEHUB_API_TOKEN")  # Retrieve token from environment variable
     if not token:
         raise ValueError("Hugging Face API token not found. Please set it in the environment variables.")
+    # Load the processor and model using the correct identifier
+    processor = AutoProcessor.from_pretrained("google/paligemma2-3b-pt-224", use_auth_token=token)
+    model = AutoModelForImageTextToText.from_pretrained("google/paligemma2-3b-pt-224", use_auth_token=token)
     return processor, model
 def process_image(image, processor, model):
     """Extract text from image using PaliGemma2."""
     # Preprocess the image
     inputs = processor(images=image, return_tensors="pt")
     # Set page configuration
     st.set_page_config(page_title="Text Reading with PaliGemma2", layout="centered")
     st.title("Text Reading from Images using PaliGemma2")
     # Load model and processor
     with st.spinner("Loading PaliGemma2 model... This may take a few moments."):
         try:
         except ValueError as e:
             st.error(str(e))
             st.stop()
     # User input: upload image
     uploaded_image = st.file_uploader("Upload an image containing text", type=["png", "jpg", "jpeg"])
     if uploaded_image is not None:
         # Display uploaded image
         image = Image.open(uploaded_image)
                 st.success("Text extraction complete!")
                 st.subheader("Extracted Text")
                 st.write(extracted_text)
     # Footer
     st.markdown("---")
+    st.markdown("**Built with [PaliGemma2](https://huggingface.co/google/paligemma2-3b-pt-224) and Streamlit**")
 if __name__ == "__main__":
     main()