hruday96 commited on
Commit
a0d0642
1 Parent(s): 42e6cab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -8
app.py CHANGED
@@ -1,20 +1,29 @@
1
  import streamlit as st # Don't forget to include `streamlit` in your `requirements.txt` file to ensure the app runs properly on Hugging Face Spaces.
2
- from transformers import PaliGemmaProcessor, PaliGemmaForConditionalGeneration # Make sure that the Hugging Face `transformers` library version supports the `PaliGemma2` model. You may need to specify the version in `requirements.txt`.
 
3
  from PIL import Image # Ensure the `pillow` library is included in your `requirements.txt`.
 
4
  import torch # Since PyTorch is required for this app, specify the appropriate version of `torch` in `requirements.txt` based on compatibility with the model.
 
5
  import os
6
 
7
  def load_model():
8
  """Load PaliGemma2 model and processor with Hugging Face token."""
 
9
  token = os.getenv("HUGGINGFACEHUB_API_TOKEN") # Retrieve token from environment variable
 
10
  if not token:
11
  raise ValueError("Hugging Face API token not found. Please set it in the environment variables.")
12
- processor = PaliGemmaProcessor.from_pretrained("google/paligemma2", token=token)
13
- model = PaliGemmaForConditionalGeneration.from_pretrained("google/paligemma2", token=token)
 
 
 
14
  return processor, model
15
 
16
  def process_image(image, processor, model):
17
  """Extract text from image using PaliGemma2."""
 
18
  # Preprocess the image
19
  inputs = processor(images=image, return_tensors="pt")
20
 
@@ -29,7 +38,7 @@ def main():
29
  # Set page configuration
30
  st.set_page_config(page_title="Text Reading with PaliGemma2", layout="centered")
31
  st.title("Text Reading from Images using PaliGemma2")
32
-
33
  # Load model and processor
34
  with st.spinner("Loading PaliGemma2 model... This may take a few moments."):
35
  try:
@@ -38,10 +47,10 @@ def main():
38
  except ValueError as e:
39
  st.error(str(e))
40
  st.stop()
41
-
42
  # User input: upload image
43
  uploaded_image = st.file_uploader("Upload an image containing text", type=["png", "jpg", "jpeg"])
44
-
45
  if uploaded_image is not None:
46
  # Display uploaded image
47
  image = Image.open(uploaded_image)
@@ -54,10 +63,10 @@ def main():
54
  st.success("Text extraction complete!")
55
  st.subheader("Extracted Text")
56
  st.write(extracted_text)
57
-
58
  # Footer
59
  st.markdown("---")
60
- st.markdown("**Built with [PaliGemma2](https://huggingface.co/google/paligemma2) and Streamlit**")
61
 
62
  if __name__ == "__main__":
63
  main()
 
1
  import streamlit as st # Don't forget to include `streamlit` in your `requirements.txt` file to ensure the app runs properly on Hugging Face Spaces.
2
+
3
+ from transformers import AutoProcessor, AutoModelForImageTextToText # Updated imports to reflect changes
4
  from PIL import Image # Ensure the `pillow` library is included in your `requirements.txt`.
5
+
6
  import torch # Since PyTorch is required for this app, specify the appropriate version of `torch` in `requirements.txt` based on compatibility with the model.
7
+
8
  import os
9
 
10
  def load_model():
11
  """Load PaliGemma2 model and processor with Hugging Face token."""
12
+
13
  token = os.getenv("HUGGINGFACEHUB_API_TOKEN") # Retrieve token from environment variable
14
+
15
  if not token:
16
  raise ValueError("Hugging Face API token not found. Please set it in the environment variables.")
17
+
18
+ # Load the processor and model using the correct identifier
19
+ processor = AutoProcessor.from_pretrained("google/paligemma2-3b-pt-224", use_auth_token=token)
20
+ model = AutoModelForImageTextToText.from_pretrained("google/paligemma2-3b-pt-224", use_auth_token=token)
21
+
22
  return processor, model
23
 
24
  def process_image(image, processor, model):
25
  """Extract text from image using PaliGemma2."""
26
+
27
  # Preprocess the image
28
  inputs = processor(images=image, return_tensors="pt")
29
 
 
38
  # Set page configuration
39
  st.set_page_config(page_title="Text Reading with PaliGemma2", layout="centered")
40
  st.title("Text Reading from Images using PaliGemma2")
41
+
42
  # Load model and processor
43
  with st.spinner("Loading PaliGemma2 model... This may take a few moments."):
44
  try:
 
47
  except ValueError as e:
48
  st.error(str(e))
49
  st.stop()
50
+
51
  # User input: upload image
52
  uploaded_image = st.file_uploader("Upload an image containing text", type=["png", "jpg", "jpeg"])
53
+
54
  if uploaded_image is not None:
55
  # Display uploaded image
56
  image = Image.open(uploaded_image)
 
63
  st.success("Text extraction complete!")
64
  st.subheader("Extracted Text")
65
  st.write(extracted_text)
66
+
67
  # Footer
68
  st.markdown("---")
69
+ st.markdown("**Built with [PaliGemma2](https://huggingface.co/google/paligemma2-3b-pt-224) and Streamlit**")
70
 
71
  if __name__ == "__main__":
72
  main()