ashok2216 commited on
Commit
44f1ae6
1 Parent(s): 70c2a12

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -14
app.py CHANGED
@@ -1,35 +1,37 @@
1
  import streamlit as st
2
- from transformers import VisionEncoderDecoderModel, ViTImageProcessor, GPT2Tokenizer, AutoImageProcessor
3
  import torch
4
  from PIL import Image
 
5
 
6
- # Load the model and tokenizer
7
  model = VisionEncoderDecoderModel.from_pretrained("ashok2216/vit-gpt2-image-captioning_COCO_FineTuned")
8
- # processor = ViTImageProcessor.from_pretrained("ashok2216/vit-gpt2-image-captioning_COCO_FineTuned")
9
- processor = AutoImageProcessor.from_pretrained("ashok2216/vit-gpt2-image-captioning_COCO_FineTuned")
10
  tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
11
 
12
- # Streamlit app title
13
- st.title("Image Captioning with ViT-GPT2 Model")
14
- st.write("Upload an image, and the model will generate a descriptive caption.")
 
 
 
 
 
 
 
15
 
16
- # File uploader for image input
17
  uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png", "jpeg"])
18
 
19
  if uploaded_file is not None:
20
- # Load and display the uploaded image
21
  image = Image.open(uploaded_file)
22
  st.image(image, caption="Uploaded Image", use_column_width=True)
23
 
24
- # Preprocess the image for the model
25
- inputs = processor(images=image, return_tensors="pt")
26
- pixel_values = inputs.pixel_values
27
 
28
  # Generate the caption
29
  with st.spinner("Generating caption..."):
30
- output = model.generate(pixel_values)
31
  caption = tokenizer.decode(output[0], skip_special_tokens=True)
32
 
33
- # Display the generated caption
34
  st.success("Generated Caption:")
35
  st.write(caption)
 
1
  import streamlit as st
2
+ from transformers import VisionEncoderDecoderModel, GPT2Tokenizer
3
  import torch
4
  from PIL import Image
5
+ from torchvision import transforms
6
 
7
+ # Load model and tokenizer
8
  model = VisionEncoderDecoderModel.from_pretrained("ashok2216/vit-gpt2-image-captioning_COCO_FineTuned")
 
 
9
  tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
10
 
11
+ # Define manual preprocessing
12
+ preprocess = transforms.Compose([
13
+ transforms.Resize((224, 224)),
14
+ transforms.ToTensor(),
15
+ transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
16
+ ])
17
+
18
+ # Streamlit app setup
19
+ st.title("Image Captioning with ViT-GPT2")
20
+ st.write("Upload an image to generate a caption.")
21
 
 
22
  uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png", "jpeg"])
23
 
24
  if uploaded_file is not None:
 
25
  image = Image.open(uploaded_file)
26
  st.image(image, caption="Uploaded Image", use_column_width=True)
27
 
28
+ # Preprocess the image manually
29
+ inputs = preprocess(image).unsqueeze(0) # Add batch dimension
 
30
 
31
  # Generate the caption
32
  with st.spinner("Generating caption..."):
33
+ output = model.generate(inputs)
34
  caption = tokenizer.decode(output[0], skip_special_tokens=True)
35
 
 
36
  st.success("Generated Caption:")
37
  st.write(caption)