PrabalPaul007's picture
Update app.py
f2f222b verified
#Libraries
import streamlit as st
from transformers import BlipForConditionalGeneration, AutoTokenizer
import torch
from PIL import Image
import torchvision.transforms as transforms
# Load the fine-tuned model and tokenizer
model = BlipForConditionalGeneration.from_pretrained("PrabalPaul007/Prabal_AI_ML_stable")
tokenizer = AutoTokenizer.from_pretrained("PrabalPaul007/Prabal_AI_ML_stable")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Function to generate caption for the uploaded image
def generate_caption(image):
# Preprocess the image
image = Image.open(image).convert("RGB")
image = image.resize((224, 224)) # Resize the image to match model input size
# Convert the image to a tensor
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
image_tensor = transform(image).unsqueeze(0).to(device)
# Generate caption
output = model.generate(pixel_values=image_tensor)
caption = tokenizer.decode(output[0], skip_special_tokens=True)
return caption
# Streamlit app
st.title("Cartoon Caption Generator")
uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", 'png'])
if uploaded_image is not None:
st.image(uploaded_image, caption='Uploaded Image.', use_column_width=True)
st.write("")
st.write("Generating caption...")
# Generate caption for the uploaded image with the fixed prompt
caption = generate_caption(uploaded_image)
st.write("Caption:", caption)